深度学习-生成动漫头像

效果展示

效果如下所示

实现步骤

1.爬取犬夜叉数据

2.截取脸部数据

3.构造网络训练。

其中大多数代码为参考已有的博客,只在调参部分有所不同。

当我按照原博调参时,判断器被训练的太强,导致生成器生成出所有的都是被一眼顶针-鉴定为假,所以,我选择,当假图片平均分大于0.5 或者 真图片平均分小于 0.5 时,训练判断器,当假图片得分小于0.3时训练生成网络。

另外,特别注意,谷歌图片质量明显高于百度,有条件优先爬谷歌

附代码

import glob
import os
import cv2
import visiual
import torchvision
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.autograd import Variable
import torch.utils.data as Data
import torchvision.transforms as transformers
import torchvision.transforms as tfs
from torch.utils.data import DataLoader, sampler
from torchvision.datasets import MNIST
import PIL.Image as Image
import numpy as np
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

plt.rcParams['figure.figsize'] = (100.0, 80.0)  # 设置画图的尺寸
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'


class MyDataset(Dataset):
    def __init__(self,root_path):
        self.path = root_path
        # data = []
        # images = glob.glob((path+'*'))#取出所有照片名称
        self.img_path = os.listdir(self.path)
    def __len__(self):
        return len(self.img_path)

    def __getitem__(self, index):
        img_name =self.img_path[index]
        img_item_path = os.path.join(self.path,img_name)
        img = Image.open(img_item_path)
        # img = img.resize((500,500))#用来改为指定大小
        img = np.array(img)
        if img.shape[2]==4:
            img= img[:,:,:3]
        label =1
        return img, label

def show_images(images):  # 定义画图工具
    images = np.reshape(images, [images.shape[0], -1])
    sqrtn = int(np.ceil(np.sqrt(images.shape[0])))
    sqrtimg = int(np.ceil(np.sqrt(images.shape[1])))

    fig = plt.figure(figsize=(sqrtn, sqrtn))
    gs = gridspec.GridSpec(sqrtn, sqrtn)
    gs.update(wspace=0.05, hspace=0.05)

    for i, img in enumerate(images):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        # plt.imshow(img.reshape([sqrtimg, sqrtimg]))
    plt.savefig('./pic/data.png')
    plt.close()
    return


def preprocess_img(x):
    x = tfs.ToTensor()(x)
    return (x - 0.5) / 0.5


def deprocess_img(x):
    # return (x + 1.0) / 2.0
    return x

class ChunkSampler(sampler.Sampler):  # 定义一个取样的函数
    """Samples elements sequentially from some offset.
    Arguments:
        num_samples: # of desired datapoints
        start: offset where we should start selecting from
    """

    def __init__(self, num_samples, start=0):
        self.num_samples = num_samples
        self.start = start

    def __iter__(self):
        return iter(range(self.start, self.start + self.num_samples))

    def __len__(self):
        return self.num_samples



# 判决网络
# 判别器
class NetD(nn.Module):
    #opt 是通道数 先是个5
    def __init__(self, opt):
        super(NetD, self).__init__()
        ndf = opt.ndf
        self.main = nn.Sequential(
            # 输入3*96*96即生成器生成的图片
            nn.Conv2d(3, ndf, kernel_size=5, stride=3, padding=1, bias=False),  # 卷积,下采样,也是编码的过程
            nn.LeakyReLU(0.2, inplace=True),
            # 输出 ndf x 32 x32

            nn.Conv2d(ndf, ndf * 2, kernel_size=4, stride=2, padding=1, bias=False),  # 正好将feature map图片大小缩小一半
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # 输出 (ndf*2) x 16 x 16

            nn.Conv2d(ndf * 2, ndf * 4, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # 输出 (ndf*4) x 8 x 8

            nn.Conv2d(ndf * 4, ndf * 8, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # 输出 (ndf*8) x 4 x 4

            nn.Conv2d(ndf * 8, 1, kernel_size=4, stride=1, padding=0, bias=False),  # 最后编码成为一个维度为1的向量
            nn.Sigmoid()  # 最后用Sigmoid作为分类,使得判别器成为一个判断二分类问题的模型,实际上判别器也是做一个二分类任务,判断是否为原图输出0或1
        )

    def forward(self, input):
        return self.main(input).view(-1)  # 转成一个列向量,即sigmoid的结果在更前面的维度

# 生成网络
# 生成器
class OPT():
    def __init__(self,):
        self.ndf =64
        self.ngf = 64
        self.nz =100
class NetG(nn.Module):

    def __init__(self, opt):
        super(NetG, self).__init__()
        ngf = opt.ngf  # 生成器feature map数

        # 生成器主要的网络模块
        self.main = nn.Sequential(
            # 输入是一个nz维的噪音,是一个随机生成的张量,可以认为是大小为1x1的feature amp
            nn.ConvTranspose2d(opt.nz, ngf * 8, kernel_size=4, stride=1, padding=0, bias=False),  # 反卷积,做上采样
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # 上一步的输出形状:(ngf*8) x 4 x 4

            nn.ConvTranspose2d(ngf * 8, ngf * 4, kernel_size=4, stride=2, padding=1, bias=False),  # 继续上采样,不断减小图片维度
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # 上一步的输出形状: (ngf*4) x 8 x 8

            nn.ConvTranspose2d(ngf * 4, ngf * 2, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # 上一步的输出形状: (ngf*2) x 16 x 16

            nn.ConvTranspose2d(ngf * 2, ngf, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # 上一步的输出形状:(ngf) x 32 x 32

            nn.ConvTranspose2d(ngf, 3, kernel_size=5, stride=3, padding=1, bias=False),
            nn.Tanh()  # 输出范围固定在 -1 ~ 1故而采用Tanh
            # 输出形状:3 x 96 x 96
            # feature map经过解码过程,最后生成一个图片
        )

    def forward(self, input):
        return self.main(input)


bce_loss = nn.BCEWithLogitsLoss()


def discriminator_loss(logits_real, logits_fake):  # 判别器的 loss
    size = logits_real.shape[0]
    true_labels = Variable(torch.ones(size, 1)).float().cuda()
    false_labels = Variable(torch.zeros(size, 1)).float().cuda()
    loss = bce_loss(logits_real, true_labels) + bce_loss(logits_fake, false_labels)
    return loss


def generator_loss(logits_fake):  # 生成器的 loss
    size = logits_fake.shape[0]
    true_labels = Variable(torch.ones(size, 1)).float().cuda()
    loss = bce_loss(logits_fake, true_labels)
    return loss


# 使用 adam 来进行训练,学习率是 3e-4, beta1 是 0.5, beta2 是 0.999
def get_optimizer(net,g=True):
    if g==True:
        optimizer = torch.optim.Adam(net.parameters(), lr=3e-5,betas=(0.5, 0.999))#) betas=(0.5, 0.999))
    else:
        optimizer = torch.optim.Adam(net.parameters(), lr=3e-5,betas=(0.5, 0.999))#, betas=(0.5, 0.999))
    return optimizer



def train_a_gan(D_net, G_net, D_optimizer, G_optimizer, discriminator_loss, generator_loss, show_every=10,
                noise_size=96, num_epochs=8000):
    iter_count = 0

    for epoch in range(num_epochs):
        g_loss =[]
        d_loss = []
        g_error, d_total_error = 0,0
        for i, (x, _) in enumerate(train_data):
            # x = x.permute(0,3,1,2)
            x = x.type(torch.cuda.FloatTensor)
            bs = x.shape[0]
            # 判别网络
            # real_data = Variable(x).view(bs, -1).cuda()  # 真实数据
            logits_real = D_net(x) .unsqueeze(1) # 判别网络得分
            sample_noise = (torch.rand((bs, opt.nz,1,1)) - 0.5) / 0.5  # -1 ~ 1 的均匀分布
            g_fake_seed = Variable(sample_noise).cuda()

            fake_images = G_net(g_fake_seed)  # 生成的假的数据
            logits_fake = D_net(fake_images).unsqueeze(1)  # 判别网络得分
            if (i%1==0 and  float(torch.mean(logits_real).cpu())<0.5) or float(torch.mean(logits_fake).cpu())>0.5 :
                d_total_error = discriminator_loss(logits_real, logits_fake)  # 判别器的 loss
                D_optimizer.zero_grad()
                d_total_error.backward()
                D_optimizer.step()  # 优化判别网络
                d_loss.append(float(d_total_error.cpu()))
            if np.random.rand()<0.011:
                # print('logits_real{:.2f}, logits_fake{:.2f}'.format(float(torch.mean(logits_real).cpu()),float(torch.mean(logits_fake).cpu())))
                print('logits_real{} , logits_fake{} '.format( torch.mean(logits_real).cpu() ,
                                                                        torch.mean(logits_fake).cpu()) )

            # 生成网络
            g_fake_seed = Variable(sample_noise).cuda()
            fake_images = G_net(g_fake_seed)  # 生成的假的数据

            gen_logits_fake = D_net(fake_images).unsqueeze(1)
            if i %5==0 and float(torch.mean(logits_real).cpu())>0.3:
                # print('2')
                g_error = generator_loss(gen_logits_fake)  # 生成网络的 loss
                G_optimizer.zero_grad()
                g_error.backward()
                G_optimizer.step()  # 优化生成网络
                g_loss.append(float(g_error.cpu()))
            if (epoch % 50 == 0):
                torch.save(G_net,'params_epoch/2/'+'G_net'+str(epoch)+'.pkl')
                torch.save(D_net, 'params_epoch/2/' +'N_net'+ str(epoch) + '.pkl')
            if (epoch %5 == 0):
                fake_face= (((fake_images[np.random.randint(0,3), :, :, :] * 0.5 + 0.5) * 255).type(torch.int16)).cpu().permute(1,2,0).numpy()
                fake_face=fake_face.astype(np.uint8)
                # cv2.imshow('face',fake_face)
                cv2.imwrite('result/2/'+str(epoch)+'.jpg',fake_face)
            iter_count += 1
        print('epoch {} Gloss {} , Dloss_D {}'.format(epoch, np.mean(g_loss), np.mean(d_loss)))




img_transforms=transforms.Compose([
        transforms.ToTensor(),
        transformers.RandomHorizontalFlip(),
         transformers.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        # transforms.Grayscale(num_output_channels=1)
    ])

NUM_TRAIN = 50000
NUM_VAL = 5000

NOISE_DIM = 96
batch_size = 8
#数据增强
# train_set = MyDataset('Inuyasha/data/0/')
train_set = torchvision.datasets.ImageFolder('Inuyasha/data',transform=img_transforms)
train_data = DataLoader(train_set, batch_size=batch_size)
# train_data = DataLoader(train_set, batch_size=batch_size, sampler=ChunkSampler(NUM_TRAIN, 0))
# val_set = MyDataset('Inuyasha/data')
# val_data = DataLoader(val_set, batch_size=batch_size)

# imgs = deprocess_img(train_data.__iter__().next()[0].view(batch_size, -1)).numpy().squeeze() # 可视化图片效果
# show_images(imgs)
# experiment6/params_epoch/1/G_net0.pkl
opt = OPT()
D=torch.load('params_epoch/1/N_net0.pkl')
G=torch.load('params_epoch/1/G_net0.pkl')
# D = NetD(opt=opt).cuda()
# G =NetG(opt).cuda()

D_optim = get_optimizer(D,False)
G_optim = get_optimizer(G)
# vis = visiual.Visualizer()
train_a_gan(D, G, D_optim, G_optim, discriminator_loss, generator_loss)

你可能感兴趣的:(gan,深度学习,pytorch,python,生成对抗网络)