背景:
深度学习对数据量的需求明显高于传统机器学习方法,当真实的数据量较少或难以满足实际网络收敛情况下,需用通过数据增强方法生成更多可用于训练的更多图片,数据增强方法通过对图像进行随机剪裁,翻转等变换,提升图像输入的丰富度,但数据增强本质上是在原图上做“线性变化”,无法产生原图中没有的图像。在这种条件下,GAN(Generative Adversarial Networks )能够在有限数据条件下生成更加丰富的数据资源,提升网络训练的有效性。
DCGAN:使用卷积神经网络替代GAN 中的多层感知机(MLP)并对网络做微调处理,显著提升了图像生成的质量。
论文链接:Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks
DCGAN设计技巧
基于Pytorch的DCGAN数据增强方法:
生成器(Generator):
class NetG(nn.Module):
def __init__(self, ngf, nz):
super(NetG, self).__init__()
# layer1输入的是一个100x1x1的随机噪声, 输出尺寸(ngf*8)x4x4
self.layer1 = nn.Sequential(
nn.ConvTranspose2d(nz, ngf * 8, kernel_size=4, stride=1, padding=0, bias=False),
nn.BatchNorm2d(ngf * 8),
nn.ReLU(inplace=True)
)
# layer2输出尺寸(ngf*4)x8x8
self.layer2 = nn.Sequential(
nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 4),
nn.ReLU(inplace=True)
)
# layer3输出尺寸(ngf*2)x16x16
self.layer3 = nn.Sequential(
nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 2),
nn.ReLU(inplace=True)
)
# layer4输出尺寸(ngf)x32x32
self.layer4 = nn.Sequential(
nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf),
nn.ReLU(inplace=True)
)
# layer5输出尺寸 3x96x96
self.layer5 = nn.Sequential(
nn.ConvTranspose2d(ngf, 3, 5, 3, 1, bias=False),
nn.Tanh()
)
# 定义NetG的前向传播
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
return out
判别器(Discriminator):(生成器的逆过程)
# 定义鉴别器网络D
class NetD(nn.Module):
def __init__(self, ndf):
super(NetD, self).__init__()
# layer1 输入 3 x 96 x 96, 输出 (ndf) x 32 x 32
self.layer1 = nn.Sequential(
nn.Conv2d(3, ndf, kernel_size=5, stride=3, padding=1, bias=False),
nn.BatchNorm2d(ndf),
nn.LeakyReLU(0.2, inplace=True)
)
# layer2 输出 (ndf*2) x 16 x 16
self.layer2 = nn.Sequential(
nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True)
)
# layer3 输出 (ndf*4) x 8 x 8
self.layer3 = nn.Sequential(
nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True)
)
# layer4 输出 (ndf*8) x 4 x 4
self.layer4 = nn.Sequential(
nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 8),
nn.LeakyReLU(0.2, inplace=True)
)
# layer5 输出一个数(概率)
self.layer5 = nn.Sequential(
nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
nn.Sigmoid()
)
# 定义NetD的前向传播
def forward(self,x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
return out
损失函数(Loss function):
criterion = nn.BCELoss()
optimizerG = torch.optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
optimizerD = torch.optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
#每次训练中
for epoch in range(1, opt.epoch + 1):
for i, (imgs,_) in enumerate(dataloader):
# 固定生成器G,训练鉴别器D
optimizerD.zero_grad()
## 让D尽可能的把真图片判别为1
imgs=imgs.to(device)
output = netD(imgs)
label.data.fill_(real_label)
label=label.to(device)
#判别图像和标签的loss
errD_real = criterion(output, label)
errD_real.backward()
## 让D尽可能把假图片判别为0
label.data.fill_(fake_label)
noise = torch.randn(opt.batchSize, opt.nz, 1, 1)
noise=noise.to(device)
fake = netG(noise) # 生成假图
output = netD(fake.detach()) #避免梯度传到G,因为G不用更新
errD_fake = criterion(output, label)
errD_fake.backward()
errD = errD_fake + errD_real
optimizerD.step()
# 固定鉴别器D,训练生成器G
optimizerG.zero_grad()
# 让D尽可能把G生成的假图判别为1
label.data.fill_(real_label)
label = label.to(device)
output = netD(fake)
errG = criterion(output, label)
errG.backward()
optimizerG.step()
模型: model.py
import torch.nn as nn
# 定义生成器网络G
class NetG(nn.Module):
def __init__(self, ngf, nz):
super(NetG, self).__init__()
# layer1输入的是一个100x1x1的随机噪声, 输出尺寸(ngf*8)x4x4
self.layer1 = nn.Sequential(
nn.ConvTranspose2d(nz, ngf * 8, kernel_size=4, stride=1, padding=0, bias=False),
nn.BatchNorm2d(ngf * 8),
nn.ReLU(inplace=True)
)
# layer2输出尺寸(ngf*4)x8x8
self.layer2 = nn.Sequential(
nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 4),
nn.ReLU(inplace=True)
)
# layer3输出尺寸(ngf*2)x16x16
self.layer3 = nn.Sequential(
nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 2),
nn.ReLU(inplace=True)
)
# layer4输出尺寸(ngf)x32x32
self.layer4 = nn.Sequential(
nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf),
nn.ReLU(inplace=True)
)
# layer5输出尺寸 3x96x96
self.layer5 = nn.Sequential(
nn.ConvTranspose2d(ngf, 3, 5, 3, 1, bias=False),
nn.Tanh()
)
# 定义NetG的前向传播
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
return out
# 定义鉴别器网络D
class NetD(nn.Module):
def __init__(self, ndf):
super(NetD, self).__init__()
# layer1 输入 3 x 96 x 96, 输出 (ndf) x 32 x 32
self.layer1 = nn.Sequential(
nn.Conv2d(3, ndf, kernel_size=5, stride=3, padding=1, bias=False),
nn.BatchNorm2d(ndf),
nn.LeakyReLU(0.2, inplace=True)
)
# layer2 输出 (ndf*2) x 16 x 16
self.layer2 = nn.Sequential(
nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True)
)
# layer3 输出 (ndf*4) x 8 x 8
self.layer3 = nn.Sequential(
nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True)
)
# layer4 输出 (ndf*8) x 4 x 4
self.layer4 = nn.Sequential(
nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 8),
nn.LeakyReLU(0.2, inplace=True)
)
# layer5 输出一个数(概率)
self.layer5 = nn.Sequential(
nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
nn.Sigmoid()
)
# 定义NetD的前向传播
def forward(self,x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
return out
训练:train.py
import os
import argparse
import torch
import torchvision
import torchvision.utils as vutils
from torchvision import transforms
from torchvision.transforms import ToPILImage
import torch.nn as nn
from random import randint
from model import NetD, NetG
from PIL import Image
from utils import tensor_to_PIL
parser = argparse.ArgumentParser()
parser.add_argument('--batchSize', type=int, default=32)
parser.add_argument('--imageSize', type=int, default=96)
parser.add_argument('--nz', type=int, default=100, help='size of the latent z vector')
parser.add_argument('--ngf', type=int, default=64)
parser.add_argument('--ndf', type=int, default=64)
parser.add_argument('--epoch', type=int, default=40000, help='number of epochs to train for')
parser.add_argument('--lr', type=float, default=0.0002, help='learning rate, default=0.0002')
parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5')
parser.add_argument('--data_path', default='data/', help='folder to train data')
parser.add_argument('--outf', default='imgs/', help='folder to output images and model checkpoints')
opt = parser.parse_args()
# 定义是否使用GPU
device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")
#图像读入与预处理
transforms = torchvision.transforms.Compose([
torchvision.transforms.Scale(opt.imageSize),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])
dataset = torchvision.datasets.ImageFolder(opt.data_path, transform=transforms)
dataloader = torch.utils.data.DataLoader(
dataset=dataset,
batch_size=opt.batchSize,
shuffle=True,
drop_last=True,
)
netG = NetG(opt.ngf, opt.nz).to(device)
netD = NetD(opt.ndf).to(device)
criterion = nn.BCELoss()
optimizerG = torch.optim.Adam(netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
optimizerD = torch.optim.Adam(netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
label = torch.FloatTensor(opt.batchSize)
real_label = 1
fake_label = 0
save_path = './imgs/epoch{:s}'
j = 0
for epoch in range(1, opt.epoch + 1):
for i, (imgs,_) in enumerate(dataloader):
# 固定生成器G,训练鉴别器D
optimizerD.zero_grad()
## 让D尽可能的把真图片判别为1
imgs=imgs.to(device)
output = netD(imgs)
label.data.fill_(real_label)
label=label.to(device)
errD_real = criterion(output, label)
errD_real.backward()
## 让D尽可能把假图片判别为0
label.data.fill_(fake_label)
noise = torch.randn(opt.batchSize, opt.nz, 1, 1)
noise=noise.to(device)
fake = netG(noise) # 生成假图
output = netD(fake.detach()) #避免梯度传到G,因为G不用更新
errD_fake = criterion(output, label)
errD_fake.backward()
errD = errD_fake + errD_real
optimizerD.step()
# 固定鉴别器D,训练生成器G
optimizerG.zero_grad()
# 让D尽可能把G生成的假图判别为1
label.data.fill_(real_label)
label = label.to(device)
output = netD(fake)
errG = criterion(output, label)
errG.backward()
optimizerG.step()
print('[%d/%d][%d/%d] Loss_D: %.3f Loss_G %.3f'
% (epoch, opt.epoch, i, len(dataloader), errD.item(), errG.item()))
if epoch % 1000 == 0:
os.mkdir(save_path.format(str(j)))
for i in range(len(fake.data)):
#im = tensor_to_PIL(fake.data[i])
#im = im.convert('RGB')
#im.save(os.path.join(save_path.format(str(j)),str(i)+'.png'))
vutils.save_image(fake.data[i],
'%s/%d.png' % (save_path.format(str(j)), i),
normalize=True)
torch.save(netG.state_dict(), '%s/netG_%03d.pth' % (opt.outf, epoch))
torch.save(netD.state_dict(), '%s/netD_%03d.pth' % (opt.outf, epoch))
j = j+1
参考博客:Pytorch版DCGAN图像生成技术