此处收集所有的实验细节,包含如下内容:G\D 结构、学习目标、输出层处理(最后输出conv还是activation)、Loss及其实现代码、超参数(尤其是Loss的权重)、以及训练代码
最基础的深度卷积GAN
来自论文:Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks
Git:https://github.com/pytorch/examples/tree/master/dcgan
# 权重初始化
# custom weights initialization called on netG and netD
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
m.weight.data.normal_(0.0, 0.02)
elif classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
# output:nn.Tanh()
# 判别器
class Discriminator(nn.Module):
def __init__(self, ngpu):
super(Discriminator, self).__init__()
self.ngpu = ngpu
self.main = nn.Sequential(
# input is (nc) x 64 x 64
nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf) x 32 x 32
nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*2) x 16 x 16
nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*4) x 8 x 8
nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 8),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*8) x 4 x 4
nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
nn.Sigmoid()
)
def forward(self, input):
if input.is_cuda and self.ngpu > 1:
output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
else:
output = self.main(input)
return output.view(-1, 1).squeeze(1)
# 训练代码
criterion = nn.BCELoss()
fixed_noise = torch.randn(opt.batchSize, nz, 1, 1, device=device)
real_label = 1
fake_label = 0
# setup optimizer
optimizerD = optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
for epoch in range(opt.niter):
for i, data in enumerate(dataloader, 0):
############################
# (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
###########################
# train with real
netD.zero_grad()
real_cpu = data[0].to(device)
batch_size = real_cpu.size(0)
label = torch.full((batch_size,), real_label, device=device)
output = netD(real_cpu)
errD_real = criterion(output, label)
errD_real.backward()
D_x = output.mean().item()
# train with fake
noise = torch.randn(batch_size, nz, 1, 1, device=device)
fake = netG(noise)
label.fill_(fake_label)
output = netD(fake.detach())
errD_fake = criterion(output, label)
errD_fake.backward()
D_G_z1 = output.mean().item()
errD = errD_real + errD_fake
optimizerD.step()
############################
# (2) Update G network: maximize log(D(G(z)))
###########################
netG.zero_grad()
label.fill_(real_label) # fake labels are real for generator cost
output = netD(fake)
errG = criterion(output, label)
errG.backward()
D_G_z2 = output.mean().item()
optimizerG.step()
#ps:GAN-MNIST里练习代码的DCGAN好像更nice
Loss:GAN loss
超参数:
nn.LeakyReLU(0.2, inplace=True)
batchSize = 64, ImageSize = 64
optimizerD = optim.Adam(netD.parameters(), lr=0.0002, betas=(0.5, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=0.0002, betas=(0.5, 0.999))
git:https://github.com/zhanghang1989/PyTorch-Multi-Style-Transfer
论文:Multi-style Generative Network for Real-time Transfer
# 提取vgg16特征的方法
def init_vgg16(model_folder):
"""load the vgg16 model feature"""
if not os.path.exists(os.path.join(model_folder, 'vgg16.weight')):
if not os.path.exists(os.path.join(model_folder, 'vgg16.t7')):
os.system(
'wget http://cs.stanford.edu/people/jcjohns/fast-neural-style/models/vgg16.t7 -O ' + os.path.join(model_folder, 'vgg16.t7'))
vgglua = load_lua(os.path.join(model_folder, 'vgg16.t7'))
vgg = Vgg16()
for (src, dst) in zip(vgglua.parameters()[0], vgg.parameters()):
dst.data[:] = src
torch.save(vgg.state_dict(), os.path.join(model_folder, 'vgg16.weight'))
# 定义VGG16网络
class Vgg16(torch.nn.Module):
def __init__(self):
super(Vgg16, self).__init__()
self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
def forward(self, X):
h = F.relu(self.conv1_1(X))
h = F.relu(self.conv1_2(h))
relu1_2 = h
h = F.max_pool2d(h, kernel_size=2, stride=2)
h = F.relu(self.conv2_1(h))
h = F.relu(self.conv2_2(h))
relu2_2 = h
h = F.max_pool2d(h, kernel_size=2, stride=2)
h = F.relu(self.conv3_1(h))
h = F.relu(self.conv3_2(h))
h = F.relu(self.conv3_3(h))
relu3_3 = h
h = F.max_pool2d(h, kernel_size=2, stride=2)
h = F.relu(self.conv4_1(h))
h = F.relu(self.conv4_2(h))
h = F.relu(self.conv4_3(h))
relu4_3 = h
return [relu1_2, relu2_2, relu3_3, relu4_3]
# 使用
vgg = Vgg16()
utils.init_vgg16(args.vgg_model_dir)
vgg.load_state_dict(torch.load(os.path.join(args.vgg_model_dir, "vgg16.weight")))
vgg.cuda()
git:https://github.com/sunshineatnoon/Paper-Implementations/tree/master/pix2pix
git:https://github.com/mrzhu-cool/pix2pix-pytorch(代码更复杂一些)
论文:Image-to-Image Translation Using Conditional Adversarial Networks
# 1.生成器:UNet
# 2.输出:nn.Tanh()
# 3.D
class Discriminator(nn.Module):
def __init__(self,input_nc,output_nc,ndf):
super(Discriminator,self).__init__()
# 256 x 256
self.layer1 = nn.Sequential(nn.Conv2d(input_nc+output_nc,ndf,kernel_size=4,stride=2,padding=1),
nn.LeakyReLU(0.2,inplace=True))
# 128 x 128
self.layer2 = nn.Sequential(nn.Conv2d(ndf,ndf*2,kernel_size=4,stride=2,padding=1),
nn.BatchNorm2d(ndf*2),
nn.LeakyReLU(0.2,inplace=True))
# 64 x 64
self.layer3 = nn.Sequential(nn.Conv2d(ndf*2,ndf*4,kernel_size=4,stride=2,padding=1),
nn.BatchNorm2d(ndf*4),
nn.LeakyReLU(0.2,inplace=True))
# 32 x 32
self.layer4 = nn.Sequential(nn.Conv2d(ndf*4,ndf*8,kernel_size=4,stride=1,padding=1),
nn.BatchNorm2d(ndf*8),
nn.LeakyReLU(0.2,inplace=True))
# 31 x 31
self.layer5 = nn.Sequential(nn.Conv2d(ndf*8,1,kernel_size=4,stride=1,padding=1),
nn.Sigmoid())
# 30 x 30
def forward(self,x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
return out
# 训练代码
# custom weights initialization called on netG and netD
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
m.weight.data.normal_(0.0, 0.02)
m.bias.data.fill_(0)
elif classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
########### LOSS & OPTIMIZER ##########
criterion = nn.BCELoss()
criterionL1 = nn.L1Loss()
optimizerD = torch.optim.Adam(netD.parameters(),lr=opt.lr, betas=(opt.beta1, 0.999))
optimizerG = torch.optim.Adam(netG.parameters(),lr=opt.lr, betas=(opt.beta1, 0.999))
real_label = 1
fake_label = 0
########### Training ###########
netD.train()
netG.train()
for epoch in range(1,opt.niter+1):
for i, image in enumerate(train_loader):
########### fDx ###########
netD.zero_grad()
if(opt.which_direction == 'AtoB'):
imgA = image[1]
imgB = image[0]
else:
imgA = image[0]
imgB = image[1]
# train with real data
real_A.data.resize_(imgA.size()).copy_(imgA)
real_B.data.resize_(imgB.size()).copy_(imgB)
real_AB = torch.cat((real_A, real_B), 1)
output = netD(real_AB)
label.data.resize_(output.size())
label.data.fill_(real_label)
errD_real = criterion(output, label)
errD_real.backward()
# train with fake
fake_B = netG(real_A)
label.data.fill_(fake_label)
fake_AB = torch.cat((real_A, fake_B), 1)
output = netD(fake_AB.detach())
errD_fake = criterion(output,label)
errD_fake.backward()
errD = (errD_fake + errD_real)/2
optimizerD.step()
########### fGx ###########
netG.zero_grad()
label.data.fill_(real_label)
output = netD(fake_AB)
errGAN = criterion(output, label)
errL1 = criterionL1(fake_B,real_B)
errG = errGAN + opt.lamb*errL1
errG.backward()
optimizerG.step()
超参数
torch.optim.Adam(netD.parameters(),lr=0.0002, betas=(0.5, 0.999))
errG = errGAN + 100*errL1 (reconstruction loss L1权重为100,GAN loss,其他实现中,该参数为10)