本文参考官方博客以及李宏毅老师讲解。
另参考https://blog.csdn.net/sunqiande88/article/details/80219842
关于其中转置卷积和卷积的问题,会另外开一篇。
理论和GAN理论类似,只是在两个模型上做了改进,
论文中详细说明了改进方法。如下:
1.将pooling层替换成带strides的卷积层。
2.消除顶层卷积特征中的全连接层,为了实现更深的网络。顶层特征指的是生成器的输入,以及判别器的输出。
3.使用BatchNorm。直接对所有层使用batchnorm会导致震荡和不稳定。所以在生成器的输出层和辨别其的输入层不用。
4.在生成器中使用ReLU激活,除了输出层,用的是tanh激活。辨别器使用的是leakyReLU激活,尤其对于高分辨率建模。
https://blog.csdn.net/yfy1127yfy/article/details/88822125
导入包:
import argparse
import torch
import torchvision
import torchvision.utils as vutils
import torch.nn as nn
from random import randint
参数:
parser = argparse.ArgumentParser()
parser.add_argument('--batchSize', type=int, default=64)
parser.add_argument('--imageSize', type=int, default=96)
parser.add_argument('--nz', type=int, default=100, help='size of the latent z vector')
parser.add_argument('--ngf', type=int, default=64)
parser.add_argument('--ndf', type=int, default=64)
parser.add_argument('--epoch', type=int, default=25, help='number of epochs to train for')
parser.add_argument('--lr', type=float, default=0.0002, help='learning rate, default=0.0002')
parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5')
parser.add_argument('--data_path', default='./face_data/', help='folder to train data')
parser.add_argument('--outf', default='./imgs/', help='folder to output images and model checkpoints')
opt = parser.parse_args()
数据处理:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.cuda.is_available())
#图像读入与预处理
transforms = torchvision.transforms.Compose([
torchvision.transforms.Resize([96, 96]),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
dataset = torchvision.datasets.ImageFolder(opt.data_path, transform=transforms)
dataloader = torch.utils.data.DataLoader(
dataset=dataset,
batch_size=opt.batchSize,
shuffle=True,
drop_last=True,
)
torch.utils.data.DataLoader()中drop_last参数。
如果数据集大小不能被批处理大小整除,则将其设置为True以删除最后一个不完整的批处理。如果为False且数据集的大小不能被批处理大小整除,则最后一个批处理将更小。(默认值:False)。建议True。
模型:
class NetG(nn.Module):
def __init__(self, ngf, nz):
super(NetG, self).__init__()
# layer1输入的是一个100x1x1的随机噪声, 输出尺寸(ngf*8)x4x4
self.layer1 = nn.Sequential(
nn.ConvTranspose2d(nz, ngf * 8, kernel_size=4, stride=1, padding=0, bias=False),
nn.BatchNorm2d(ngf * 8),
nn.ReLU(inplace=True)
)
# layer2输出尺寸(ngf*4)x8x8
self.layer2 = nn.Sequential(
nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 4),
nn.ReLU(inplace=True)
)
# layer3输出尺寸(ngf*2)x16x16
self.layer3 = nn.Sequential(
nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 2),
nn.ReLU(inplace=True)
)
# layer4输出尺寸(ngf)x32x32
self.layer4 = nn.Sequential(
nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf),
nn.ReLU(inplace=True)
)
# layer5输出尺寸 3x96x96
self.layer5 = nn.Sequential(
nn.ConvTranspose2d(ngf, 3, 5, 3, 1, bias=False),
nn.Tanh()
)
# 定义NetG的前向传播
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
return out
# 定义鉴别器网络D
class NetD(nn.Module):
def __init__(self, ndf):
super(NetD, self).__init__()
# layer1 输入 3 x 96 x 96, 输出 (ndf) x 32 x 32#64
self.layer1 = nn.Sequential(
nn.Conv2d(3, ndf, kernel_size=5, stride=3, padding=1, bias=False),
nn.BatchNorm2d(ndf),
nn.LeakyReLU(0.2, inplace=True)
)
# layer2 输出 (ndf*2) x 16 x 16
self.layer2 = nn.Sequential(
nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True)
)
# layer3 输出 (ndf*4) x 8 x 8
self.layer3 = nn.Sequential(
nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True)
)
# layer4 输出 (ndf*8) x 4 x 4
self.layer4 = nn.Sequential(
nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 8),
nn.LeakyReLU(0.2, inplace=True)
)
# layer5 输出一个数(概率)
self.layer5 = nn.Sequential(
nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
nn.Sigmoid()
)
# 定义NetD的前向传播
def forward(self,x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
return out
训练前的设置:
netG = NetG(opt.ngf, opt.nz).to(device)
netD = NetD(opt.ndf).to(device)
criterion = nn.BCELoss()
optimizerG = torch.optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
optimizerD = torch.optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
label = torch.FloatTensor(opt.batchSize)
real_label = 1
fake_label = 0
训练:
for epoch in range(1, opt.epoch + 1):
for i, (imgs,_) in enumerate(dataloader):
# 固定生成器G,训练鉴别器D
optimizerD.zero_grad()
## 让D尽可能的把真图片判别为1
imgs=imgs.to(device)
output = netD(imgs)
label.data.fill_(real_label)
label=label.to(device)
errD_real = criterion(output, label)
errD_real.backward()
## 让D尽可能把假图片判别为0
label.data.fill_(fake_label)
noise = torch.randn(opt.batchSize, opt.nz, 1, 1)
noise=noise.to(device)
fake = netG(noise) # 生成假图
output = netD(fake.detach()) #避免梯度传到G,因为G不用更新
errD_fake = criterion(output, label)
errD_fake.backward()
errD = errD_fake + errD_real
optimizerD.step()
# 固定鉴别器D,训练生成器G
optimizerG.zero_grad()
# 让D尽可能把G生成的假图判别为1
label.data.fill_(real_label)
label = label.to(device)
output = netD(fake)
errG = criterion(output, label)
errG.backward()
optimizerG.step()
print('[%d/%d][%d/%d] Loss_D: %.3f Loss_G %.3f'
% (epoch, opt.epoch, i, len(dataloader), errD.item(), errG.item()))
vutils.save_image(fake.data,
'%s/fake_samples_epoch_%03d.png' % (opt.outf, epoch),
normalize=True)
torch.save(netG.state_dict(), '%s/netG_%03d.pth' % (opt.outf, epoch))
torch.save(netD.state_dict(), '%s/netD_%03d.pth' % (opt.outf, epoch))