GANs有一个非常大的问题,就是训练的结果非常不稳定,很有可能训练出来的模型生成的是一个乱七八糟的东西。
GANs have been known to be unstable to train, often resulting in generators that produce nonsensical outputs
这篇论文的贡献
We propose and evaluate a set of constraintson the architectural topology of Convolutional GANs that make them stable to train in most settings. We name this class of architectures Deep Convolutional GANs (DCGAN)
We use the trained discriminators for image classification tasks, showing competitive performance with other unsupervised algorithms.
We visualize the filters learnt by GANs and empirically show that specific filters have learned to draw specific objects
Weshowthatthegeneratorshaveinterestingvectorarithmeticpropertiesallowingforeasy manipulation of many semantic qualities of generated samples.
历史上尝试去增强GANs通过CNNs去建模这个图片可以说是非常不成功。
这个想法从LAPGANs的作者去发展
我们也遇到了很多困难,在尝试用CNN的结构来提高GANs的过程中。
然而,在扩展了这个模型探索之后,我们认识到了一族结构。这种结构,使得可以在一些数据集上的稳定地训练结果。训练得到更好的结果,并且更深的生成模型。
下面代码的输出结果其实不是很好
原因是我的EPOCH设置为了1。如果你设置为更大的数值效果会好很多。
原因是,这个速度实在是太慢了。
而且,由于是把所有的特征都放在一起,没有说有一个分类的操作,就直接开始来训练了。怪不得清华小哥们提出了TripleGANs的思路!
那么多不是一个类的东西来混在一起来特征寻找,效果肯定是非常糟糕啦!
但是下面的代码还是具有研究意义的(因为只需要在数据集上做一下操作,就可以变成一个正常的东西了。。这个效率实在是太低)。我会再改写一个只有一个生成目标的DCGANs。
import torch
import torch.nn as nn
import torchvision
import torch.utils.data as Data
import matplotlib.pyplot as plt
# Hyper Parameters
EPOCH = 1 # 训练整批数据多少次
BATCH_SIZE = 50
LR = 0.0002 # 学习率
DOWNLOAD_MNIST = True # 已经下载好的话,会自动跳过的
len_Z = 100 # random input.channal for Generator
g_hidden_channal = 64
d_hidden_channal = 64
image_channal = 1 # mnist数据为黑白的只有一维
# Mnist 手写数字
train_data = torchvision.datasets.MNIST(
root='./mnist/', # 保存或者提取位置
train=True, # this is training data
transform=torchvision.transforms.ToTensor(), # 转换 PIL.Image or numpy.ndarray 成
# torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0, 1.0] 区间
download=DOWNLOAD_MNIST, # 没下载就下载, 下载了就不用再下了
)
test_data = torchvision.datasets.MNIST(
root='./mnist/',
train=False
)
# 训练集丢BATCH_SIZE个, 图片大小为28*28
train_loader = Data.DataLoader(
dataset=train_data,
batch_size=BATCH_SIZE,
shuffle=True # 是否打乱顺序
)
class Generator(nn.Module):
def __init__(self, len_Z, hidden_channal, output_channal):
super(Generator, self).__init__()
self.layer1 = nn.Sequential(
nn.ConvTranspose2d(
in_channels=len_Z,
out_channels=hidden_channal * 4,
kernel_size=4,
),
nn.BatchNorm2d(hidden_channal * 4),
nn.ReLU()
)
# [BATCH, hidden_channal * 4 , 4, 4]
self.layer2 = nn.Sequential(
nn.ConvTranspose2d(
in_channels=hidden_channal * 4,
out_channels=hidden_channal * 2,
kernel_size=3, # 保证生成图像大小为28
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal * 2),
nn.ReLU()
)
#
self.layer3 = nn.Sequential(
nn.ConvTranspose2d(
in_channels=hidden_channal * 2,
out_channels=hidden_channal,
kernel_size=4,
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal),
nn.ReLU()
)
self.layer4 = nn.Sequential(
nn.ConvTranspose2d(
in_channels=hidden_channal,
out_channels=output_channal,
kernel_size=4,
stride=2,
padding=1
),
nn.Tanh()
)
def forward(self, x):
# [50, 100, 1, 1]
out = self.layer1(x)
# [50, 256, 4, 4]
# print(out.shape)
out = self.layer2(out)
# [50, 128, 7, 7]
# print(out.shape)
out = self.layer3(out)
# [50, 64, 14, 14]
# print(out.shape)
out = self.layer4(out)
# print(out.shape)
# [50, 1, 28, 28]
return out
# # Test Generator
# G = Generator(len_Z, g_hidden_channal, image_channal)
# data = torch.randn((BATCH_SIZE, len_Z, 1, 1))
# print(G(data).shape)
class Discriminator(nn.Module):
def __init__(self, input_channal, hidden_channal):
super(Discriminator, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(
in_channels=input_channal,
out_channels=hidden_channal,
kernel_size=4,
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal),
nn.LeakyReLU(0.2)
)
self.layer2 = nn.Sequential(
nn.Conv2d(
in_channels=hidden_channal,
out_channels=hidden_channal * 2,
kernel_size=4,
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal * 2),
nn.LeakyReLU(0.2)
)
self.layer3 = nn.Sequential(
nn.Conv2d(
in_channels=hidden_channal * 2,
out_channels=hidden_channal * 4,
kernel_size=3,
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal * 4),
nn.LeakyReLU(0.2)
)
self.layer4 = nn.Sequential(
nn.Conv2d(
in_channels=hidden_channal * 4,
out_channels=1,
kernel_size=4,
stride=1,
padding=0
),
nn.Sigmoid()
)
# [BATCH, 1, 1, 1]
def forward(self, x):
# print(x.shape)
out = self.layer1(x)
# print(out.shape)
out = self.layer2(out)
# print(out.shape)
out = self.layer3(out)
# print(out.shape)
out = self.layer4(out)
return out
# # Test Discriminator
# D = Discriminator(1, d_hidden_channal)
# data = torch.randn((BATCH_SIZE, 1, 28, 28))
# print(D(data).shape)
G = Generator(len_Z, g_hidden_channal, image_channal)
D = Discriminator(image_channal, g_hidden_channal)
# loss & optimizer
criterion = nn.BCELoss()
optimD = torch.optim.Adam(D.parameters(), lr=LR)
optimG = torch.optim.Adam(G.parameters(), lr=LR)
label_Real = torch.FloatTensor(BATCH_SIZE).data.fill_(1)
label_Fake = torch.FloatTensor(BATCH_SIZE).data.fill_(0)
for epoch in range(EPOCH):
for step, (images, imagesLabel) in enumerate(train_loader):
G_ideas = torch.randn((BATCH_SIZE, len_Z, 1, 1))
G_paintings = G(G_ideas)
prob_artist0 = D(images) # D try to increase this prob
prob_artist1 = D(G_paintings)
p0 = torch.squeeze(prob_artist0)
p1 = torch.squeeze(prob_artist1)
errD_real = criterion(p0, label_Real)
errD_fake = criterion(p1, label_Fake)
# errD_fake.backward()
errD = errD_fake + errD_real
errG = criterion(p1, label_Real)
optimD.zero_grad()
errD.backward(retain_graph=True)
optimD.step()
optimG.zero_grad()
errG.backward(retain_graph=True)
optimG.step()
if (step+1) % 100 == 0:
picture = torch.squeeze(G_paintings[0]).detach().numpy()
plt.imshow(picture, cmap=plt.cm.gray_r)
plt.show()
根据了我写了另外一篇文章
生成器的进化过程:
import torch
import torch.nn as nn
import torchvision
import torch.utils.data as Data
import matplotlib.pyplot as plt
# Hyper Parameters
EPOCH = 10 # 训练整批数据多少次
BATCH_SIZE = 50
LR = 0.0002 # 学习率
DOWNLOAD_MNIST = False # 已经下载好的话,会自动跳过的
len_Z = 100 # random input.channal for Generator
g_hidden_channal = 64
d_hidden_channal = 64
image_channal = 1 # mnist数据为黑白的只有一维
# Mnist 手写数字
class myMNIST(torchvision.datasets.MNIST):
def __init__(self, root, train=True, transform=None, target_transform=None, download=False, targetNum=None):
super(myMNIST, self).__init__(
root,
train=train,
transform=transform,
target_transform=target_transform,
download=download)
if targetNum:
self.train_data = self.train_data[self.train_labels == targetNum]
self.train_data = self.train_data[:int(self.__len__() / BATCH_SIZE) * BATCH_SIZE]
self.train_labels = self.train_labels[self.train_labels == targetNum][
:int(self.__len__() / BATCH_SIZE) * BATCH_SIZE]
def __len__(self):
if self.train:
return self.train_data.shape[0]
else:
return 10000
train_data = myMNIST(
root='./mnist/', # 保存或者提取位置
train=True, # this is training data
transform=torchvision.transforms.ToTensor(), # 转换 PIL.Image or numpy.ndarray 成
# torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0, 1.0] 区间
download=DOWNLOAD_MNIST, # 没下载就下载, 下载了就不用再下了
targetNum=1
)
print(len(train_data))
# print(train_data.shape)
# 训练集丢BATCH_SIZE个, 图片大小为28*28
train_loader = Data.DataLoader(
dataset=train_data,
batch_size=BATCH_SIZE,
shuffle=True # 是否打乱顺序
)
class Generator(nn.Module):
def __init__(self, len_Z, hidden_channal, output_channal):
super(Generator, self).__init__()
self.layer1 = nn.Sequential(
nn.ConvTranspose2d(
in_channels=len_Z,
out_channels=hidden_channal * 4,
kernel_size=4,
),
nn.BatchNorm2d(hidden_channal * 4),
nn.ReLU()
)
# [BATCH, hidden_channal * 4 , 4, 4]
self.layer2 = nn.Sequential(
nn.ConvTranspose2d(
in_channels=hidden_channal * 4,
out_channels=hidden_channal * 2,
kernel_size=3, # 保证生成图像大小为28
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal * 2),
nn.ReLU()
)
#
self.layer3 = nn.Sequential(
nn.ConvTranspose2d(
in_channels=hidden_channal * 2,
out_channels=hidden_channal,
kernel_size=4,
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal),
nn.ReLU()
)
self.layer4 = nn.Sequential(
nn.ConvTranspose2d(
in_channels=hidden_channal,
out_channels=output_channal,
kernel_size=4,
stride=2,
padding=1
),
nn.Tanh()
)
def forward(self, x):
# [50, 100, 1, 1]
out = self.layer1(x)
# [50, 256, 4, 4]
# print(out.shape)
out = self.layer2(out)
# [50, 128, 7, 7]
# print(out.shape)
out = self.layer3(out)
# [50, 64, 14, 14]
# print(out.shape)
out = self.layer4(out)
# print(out.shape)
# [50, 1, 28, 28]
return out
# # Test Generator
# G = Generator(len_Z, g_hidden_channal, image_channal)
# data = torch.randn((BATCH_SIZE, len_Z, 1, 1))
# print(G(data).shape)
class Discriminator(nn.Module):
def __init__(self, input_channal, hidden_channal):
super(Discriminator, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(
in_channels=input_channal,
out_channels=hidden_channal,
kernel_size=4,
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal),
nn.LeakyReLU(0.2)
)
self.layer2 = nn.Sequential(
nn.Conv2d(
in_channels=hidden_channal,
out_channels=hidden_channal * 2,
kernel_size=4,
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal * 2),
nn.LeakyReLU(0.2)
)
self.layer3 = nn.Sequential(
nn.Conv2d(
in_channels=hidden_channal * 2,
out_channels=hidden_channal * 4,
kernel_size=3,
stride=2,
padding=1
),
nn.BatchNorm2d(hidden_channal * 4),
nn.LeakyReLU(0.2)
)
self.layer4 = nn.Sequential(
nn.Conv2d(
in_channels=hidden_channal * 4,
out_channels=1,
kernel_size=4,
stride=1,
padding=0
),
nn.Sigmoid()
)
# [BATCH, 1, 1, 1]
def forward(self, x):
# print(x.shape)
out = self.layer1(x)
# print(out.shape)
out = self.layer2(out)
# print(out.shape)
out = self.layer3(out)
# print(out.shape)
out = self.layer4(out)
return out
# # Test Discriminator
# D = Discriminator(1, d_hidden_channal)
# data = torch.randn((BATCH_SIZE, 1, 28, 28))
# print(D(data).shape)
G = Generator(len_Z, g_hidden_channal, image_channal)
D = Discriminator(image_channal, g_hidden_channal)
# loss & optimizer
criterion = nn.BCELoss()
optimD = torch.optim.Adam(D.parameters(), lr=LR)
optimG = torch.optim.Adam(G.parameters(), lr=LR)
label_Real = torch.FloatTensor(BATCH_SIZE).data.fill_(1)
label_Fake = torch.FloatTensor(BATCH_SIZE).data.fill_(0)
for epoch in range(EPOCH):
for step, (images, imagesLabel) in enumerate(train_loader):
G_ideas = torch.randn((BATCH_SIZE, len_Z, 1, 1))
G_paintings = G(G_ideas)
prob_artist0 = D(images) # D try to increase this prob
prob_artist1 = D(G_paintings)
p0 = torch.squeeze(prob_artist0)
p1 = torch.squeeze(prob_artist1)
errD_real = criterion(p0, label_Real)
errD_fake = criterion(p1, label_Fake)
# errD_fake.backward()
errD = errD_fake + errD_real
errG = criterion(p1, label_Real)
optimD.zero_grad()
errD.backward(retain_graph=True)
optimD.step()
optimG.zero_grad()
errG.backward(retain_graph=True)
optimG.step()
picture = torch.squeeze(G_paintings[0]).detach().numpy()
plt.imshow(picture, cmap=plt.cm.gray_r)
plt.show()