深度卷积生成对抗网络 (DCGAN) 是一种生成模型,它使用深度卷积神经网络来生成新数据样本的任务。以下是有关 DCGAN 的一些要点:
建筑:
DCGAN由生成器和鉴别器网络组成。
生成器负责从随机噪声中生成真实的数据样本。
鉴别器试图区分真实数据样本和生成器生成的数据样本。
卷积层:
DCGAN 在生成器和鉴别器中使用卷积层来捕获数据中的空间层次结构和模式。
卷积层有助于学习局部特征,对于图像相关任务至关重要。
批量归一化:
批量归一化通常用于生成器和判别器中,以稳定和加速训练。
它将输入归一化到图层,有助于缓解渐变消失等问题。
激活功能:
通常,ReLU(整流线性单元)激活函数用于中间层的生成器中。
生成器的输出层通常使用 tanh 激活函数来生成介于 -1 和 1 之间的像素值。
发电机输入:
发生器的输入通常是随机噪声(通常从正态分布中采样)。
生成器学会将这种噪声转换为真实的数据样本。
鉴别器输出:
鉴别器的输出是一个概率,指示输入是真实数据样本的可能性。
sigmoid 激活函数通常用于鉴别器的输出层,以生成介于 0 和 1 之间的值。
损失函数:
生成器旨在最小化判别器进行正确分类的概率(最小化 log(1 - D(G(z))),其中 G(z) 是生成的样本)。
鉴别器旨在正确分类真实样本和生成的样本(最小化真实样本的 log(D(x)) 和生成的样本的 log(1 - D(G(z)))。
培训流程:
DCGAN 使用最小-最大博弈进行训练,其中生成器和鉴别器是迭代训练的。
训练过程涉及更新两个网络的权重以提高其性能。
可视化:
在训练过程中,DCGAN产生越来越逼真的数据样本,生成器学习生成多样化和高质量的输出。
应用:
DCGAN广泛用于图像生成任务,包括生成逼真的人脸、物体和场景。
它们还被应用于图像到图像转换和样式转换等任务。
import matplotlib.pyplot as plt
import numpy as np
import pickle as pkl
import os
# 设置环境变量以避免 OpenMP 问题
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
import torch
from torchvision import datasets
from torchvision import transforms
transform=transforms.ToTensor()
svhn_train=datasets.SVHN(root='data/',split='train',download=True,transform=transform)
batch_size=128
num_workers=0
train_loader = torch.utils.data.DataLoader(dataset=svhn_train,
batch_size=batch_size,
shuffle=True,
num_workers=num_workers)
# 可视化数据
# Visualize data
dataiter = iter(train_loader)
images, labels = next(dataiter)
fig = plt.figure(figsize=(25, 4))
plot_size = 20
for idx in np.arange(plot_size):
ax = fig.add_subplot(2, plot_size // 2, idx + 1, xticks=[], yticks=[])
ax.imshow(np.transpose(images[idx], (1, 2, 0)))
ax.set_title(str(labels[idx].item()))
img = images[0]
print('Min:', img.min())
print('Max:', img.max())
plt.show()
# helper scale function
def scale(x, feature_range=(-1, 1)):
min, max = feature_range
x = x * (max - min) + min
return x
# scaled range
scaled_img = scale(img)
print('Scaled min: ', scaled_img.min())
print('Scaled max: ', scaled_img.max())
# 定义模型
import torch.nn as nn
import torch.nn.functional as F
# helper conv function
def conv(in_channels, out_channels, kernel_size, stride=2, padding=1, batch_norm=True):
layers = []
conv_layer = nn.Conv2d(in_channels, out_channels,
kernel_size, stride, padding, bias=False)
layers.append(conv_layer)
if batch_norm:
layers.append(nn.BatchNorm2d(out_channels))
return nn.Sequential(*layers)
class Discriminator(nn.Module):
def __init__(self, conv_dim=32):
super(Discriminator, self).__init__()
self.conv_dim = conv_dim
self.conv1 = conv(3, conv_dim, 4, batch_norm=False)
self.conv2 = conv(conv_dim, conv_dim * 2, 4)
self.conv3 = conv(conv_dim * 2, conv_dim * 4, 4)
self.fc = nn.Linear(conv_dim * 4 * 4 * 4, 1)
def forward(self, x):
out = F.leaky_relu(self.conv1(x), 0.2)
out = F.leaky_relu(self.conv2(out), 0.2)
out = F.leaky_relu(self.conv3(out), 0.2)
out = out.view(-1, self.conv_dim * 4 * 4 * 4)
out = self.fc(out)
return out
def deconv(in_channels, out_channels, kernel_size, stride=2, padding=1, batch_norm=True):
layers = []
transpose_conv_layer = nn.ConvTranspose2d(in_channels, out_channels,
kernel_size, stride, padding, bias=False)
layers.append(transpose_conv_layer)
if batch_norm:
layers.append(nn.BatchNorm2d(out_channels))
return nn.Sequential(*layers)
class Generator(nn.Module):
def __init__(self, z_size, conv_dim=32):
super(Generator, self).__init__()
self.conv_dim = conv_dim
self.fc = nn.Linear(z_size, conv_dim * 4 * 4 * 4)
self.t_conv1 = deconv(conv_dim * 4, conv_dim * 2, 4)
self.t_conv2 = deconv(conv_dim * 2, conv_dim, 4)
self.t_conv3 = deconv(conv_dim, 3, 4, batch_norm=False)
def forward(self, x):
out = self.fc(x)
out = out.view(-1, self.conv_dim * 4, 4, 4)
out = F.relu(self.t_conv1(out))
out = F.relu(self.t_conv2(out))
out = self.t_conv3(out)
out = F.tanh(out)
return out
conv_dim = 32
z_size = 100
D = Discriminator(conv_dim)
G = Generator(z_size=z_size, conv_dim=conv_dim)
print(D)
print()
print(G)
train_on_gpu = torch.cuda.is_available()
if train_on_gpu:
G.cuda()
D.cuda()
print('GPU available for training. Models moved to GPU')
else:
print('Training on CPU.')
def real_loss(D_out, smooth=False):
batch_size = D_out.size(0)
# label smoothing
if smooth:
# smooth, real labels = 0.9
labels = torch.ones(batch_size)*0.9
else:
labels = torch.ones(batch_size) # real labels = 1
# move labels to GPU if available
if train_on_gpu:
labels = labels.cuda()
# binary cross entropy with logits loss
criterion = nn.BCEWithLogitsLoss()
# calculate loss
loss = criterion(D_out.squeeze(), labels)
return loss
def fake_loss(D_out):
batch_size = D_out.size(0)
labels = torch.zeros(batch_size) # fake labels = 0
if train_on_gpu:
labels = labels.cuda()
criterion = nn.BCEWithLogitsLoss()
# calculate loss
loss = criterion(D_out.squeeze(), labels)
return loss
import torch.optim as optim
# params
lr = 0.0002
beta1=0.5
beta2=0.999
d_optimizer = optim.Adam(D.parameters(), lr, [beta1, beta2])
g_optimizer = optim.Adam(G.parameters(), lr, [beta1, beta2])
import pickle as pkl
num_epochs = 50
samples = []
losses = []
print_every = 300
sample_size = 16
fixed_z = np.random.uniform(-1, 1, size=(sample_size, z_size))
fixed_z = torch.from_numpy(fixed_z).float()
for epoch in range(num_epochs):
for batch_i, (real_images, _) in enumerate(train_loader):
batch_size = real_images.size(0)
real_images = scale(real_images)
d_optimizer.zero_grad()
if train_on_gpu:
real_images = real_images.cuda()
D_real = D(real_images)
d_real_loss = real_loss(D_real)
z = np.random.uniform(-1, 1, size=(batch_size, z_size))
z = torch.from_numpy(z).float()
if train_on_gpu:
z = z.cuda()
fake_images = G(z)
D_fake = D(fake_images)
d_fake_loss = fake_loss(D_fake)
d_loss = d_real_loss + d_fake_loss
d_loss.backward()
d_optimizer.step()
g_optimizer.zero_grad()
z = np.random.uniform(-1, 1, size=(batch_size, z_size))
z = torch.from_numpy(z).float()
if train_on_gpu:
z = z.cuda()
fake_images = G(z)
D_fake = D(fake_images)
g_loss = real_loss(D_fake)
g_loss.backward()
g_optimizer.step()
if batch_i % print_every == 0:
losses.append((d_loss.item(), g_loss.item()))
print('Epoch [{:5d}/{:5d}] | d_loss: {:6.4f} | g_loss: {:6.4f}'.format(
epoch + 1, num_epochs, d_loss.item(), g_loss.item()))
G.eval()
if train_on_gpu:
fixed_z = fixed_z.cuda()
samples_z = G(fixed_z)
samples.append(samples_z)
G.train()
with open('train_samples.pkl', 'wb') as f:
pkl.dump(samples, f)
fig, ax = plt.subplots()
losses = np.array(losses)
plt.plot(losses.T[0], label='Discriminator', alpha=0.5)
plt.plot(losses.T[1], label='Generator', alpha=0.5)
plt.title("Training Losses")
plt.legend()
plt.show()
def view_samples(epoch, samples):
fig, axes = plt.subplots(figsize=(16,4), nrows=2, ncols=8, sharey=True, sharex=True)
for ax, img in zip(axes.flatten(), samples[epoch]):
img = img.detach().cpu().numpy()
img = np.transpose(img, (1, 2, 0))
img = ((img +1)*255 / (2)).astype(np.uint8)
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
im = ax.imshow(img.reshape((32,32,3)))
_ = view_samples(-1, samples)