MindSpore实现生成对抗网络(2)

MindSpore实现生成对抗网络-GAN (2)

在MindSpore实现GAN(1)这篇文章中,讲诉了MindSpore框架实现简单GAN的一些要点,主要包括定义WithLossCell连接loss和网络,自定义TrainOneStepCell等。按照这些基本的结构,可以很容易的实现DCGAN和CGAN。如果想要实现别的复杂的GAN,需要重新定义一下WithLossCell。
现在,就来简单讲解一些使用DCGAN生成MNIST手写数字(其实有了之前定义的结构,完全可以不需要这篇文章。。。照着别的框架的代码写就是了,大体流程都和MindSpore实现GAN(1)一样的。。。但我偏不,啦啦啦啦啦~)。

简单的DCGAN说明

DCGAN是普通GAN的改进,最大的贡献是将卷积神经网络引入到了GAN架构中。其生成器和判别器都是用了CNN来代替原本的多层感知机。主要的改进点如下:

  1. 用卷积和转置卷积(用来在生成器中上采样)代替池化层,去掉了全连接层
  2. 生成器和判别器都添加了BacthNorm
  3. 生成器的输出使用Tanh激活,别的层使用ReLu激活
  4. 判别器的各层使用LeakyReLu激活

更加具体的内容可以去阅读DCGAN的原始论文,在网上也有很多解读文章。它是一个很经典的结构,几乎算作生成对抗网络的入门了。

各个模块的设计

首先将WithLossCell等可以复用的代码整理到一个文件中,这样以后就不用重写了。在这里,我把它们放到了cells.py下。这一部分在上一篇文章中都有,所以就不再赘述了。

from mindspore import nn
import mindspore.ops.operations as P
import mindspore.ops.functional as F
import mindspore.ops.composite as C
from mindspore.parallel._utils import (_get_device_num, _get_gradients_mean,
                                       _get_parallel_mode)
from mindspore.context import ParallelMode
from mindspore.nn.wrap.grad_reducer import DistributedGradReducer


class SigmoidCrossEntropyWithLogits(nn.loss.loss._Loss):
    def __init__(self):
        super().__init__()
        self.cross_entropy = P.SigmoidCrossEntropyWithLogits()

    def construct(self, data, label):
        x = self.cross_entropy(data, label)
        return self.get_loss(x)


class GenWithLossCell(nn.Cell):
    def __init__(self, netG, netD, loss_fn, auto_prefix=True):
        super(GenWithLossCell, self).__init__(auto_prefix=auto_prefix)
        self.netG = netG
        self.netD = netD
        self.loss_fn = loss_fn

    def construct(self, latent_code):
        fake_data = self.netG(latent_code)
        fake_out = self.netD(fake_data)
        loss_G = self.loss_fn(fake_out, F.ones_like(fake_out))

        return loss_G


class DisWithLossCell(nn.Cell):
    def __init__(self, netG, netD, loss_fn, auto_prefix=True):
        super(DisWithLossCell, self).__init__(auto_prefix=auto_prefix)
        self.netG = netG
        self.netD = netD
        self.loss_fn = loss_fn

    def construct(self, real_data, latent_code):
        fake_data = self.netG(latent_code)
        real_out = self.netD(real_data)
        real_loss = self.loss_fn(real_out, F.ones_like(real_out))
        fake_out = self.netD(fake_data)
        fake_loss = self.loss_fn(fake_out, F.zeros_like(fake_out))
        loss_D = real_loss + fake_loss

        return loss_D


class TrainOneStepCell(nn.Cell):
    def __init__(
        self,
        netG: GenWithLossCell,
        netD: DisWithLossCell,
        optimizerG: nn.Optimizer,
        optimizerD: nn.Optimizer,
        sens=1.0,
        auto_prefix=True,
    ):
        super(TrainOneStepCell, self).__init__(auto_prefix=auto_prefix)
        self.netG = netG
        self.netG.set_grad()
        self.netG.add_flags(defer_inline=True)

        self.netD = netD
        self.netD.set_grad()
        self.netD.add_flags(defer_inline=True)

        self.weights_G = optimizerG.parameters
        self.optimizerG = optimizerG
        self.weights_D = optimizerD.parameters
        self.optimizerD = optimizerD

        self.grad = C.GradOperation(get_by_list=True, sens_param=True)

        self.sens = sens
        self.reducer_flag = False
        self.grad_reducer_G = F.identity
        self.grad_reducer_D = F.identity
        self.parallel_mode = _get_parallel_mode()
        if self.parallel_mode in (ParallelMode.DATA_PARALLEL,
                                  ParallelMode.HYBRID_PARALLEL):
            self.reducer_flag = True
        if self.reducer_flag:
            mean = _get_gradients_mean()
            degree = _get_device_num()
            self.grad_reducer_G = DistributedGradReducer(
                self.weights_G, mean, degree)
            self.grad_reducer_D = DistributedGradReducer(
                self.weights_D, mean, degree)

    def trainD(self, real_data, latent_code, loss, loss_net, grad, optimizer,
               weights, grad_reducer):
        sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
        grads = grad(loss_net, weights)(real_data, latent_code, sens)
        grads = grad_reducer(grads)
        return F.depend(loss, optimizer(grads))

    def trainG(self, latent_code, loss, loss_net, grad, optimizer, weights,
               grad_reducer):
        sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
        grads = grad(loss_net, weights)(latent_code, sens)
        grads = grad_reducer(grads)
        return F.depend(loss, optimizer(grads))

    def construct(self, real_data, latent_code):
        loss_D = self.netD(real_data, latent_code)
        loss_G = self.netG(latent_code)
        d_out = self.trainD(real_data, latent_code, loss_D, self.netD, self.grad,
                            self.optimizerD, self.weights_D,
                            self.grad_reducer_D)
        g_out = self.trainG(latent_code, loss_G, self.netG, self.grad,
                            self.optimizerG, self.weights_G,
                            self.grad_reducer_G)

        return d_out, g_out

生成器结构

这里我直接借鉴了Tensorflow中的DCGAN的结构(太懒了,主要是不想去改结构,调参啥的)。因为生成器的输入还是一个噪声向量,所以我们需要定义一个Reshape来把一个向量变成矩阵这样具有长和宽的形式。在cells.py中添加:

class Reshape(nn.Cell):
    def __init__(self, shape, auto_prefix=True):
        super().__init__(auto_prefix=auto_prefix)
        self.shape = shape
        self.reshape = P.Reshape()

    def construct(self, x):
        return self.reshape(x, self.shape)

接下来就是网络的定义。先使用Reshape调整特征的形状,然后使用转置卷积Conv2dTranspose逐步上采样,得到28*28大小的输出。

class Generator(nn.Cell):
    """定义生成器结构"""

    def __init__(self, latent_size, auto_prefix=True):
        super(Generator, self).__init__(auto_prefix=auto_prefix)
        self.network = nn.SequentialCell()

        self.network.append(nn.Dense(latent_size, 128 * 7 * 7, has_bias=False))
        self.network.append(Reshape((-1, 128, 7, 7)))
        self.network.append(nn.BatchNorm2d(128))
        self.network.append(nn.ReLU())

        self.network.append(nn.Conv2dTranspose(128, 64, 5, 1))
        self.network.append(nn.BatchNorm2d(64))
        self.network.append(nn.ReLU())

        self.network.append(nn.Conv2dTranspose(64, 32, 5, 2))
        self.network.append(nn.BatchNorm2d(32))
        self.network.append(nn.ReLU())

        self.network.append(nn.Conv2dTranspose(32, 1, 5, 2))
        self.network.append(nn.Tanh())

    def construct(self, x):
        return self.network(x)

判别器结构

按照论文的描述,使用卷积下采样,除了输出层,其余各层使LeakyReLu作为激活函数

class Discriminator(nn.Cell):
    '''定义判别器结构'''

    def __init__(self, auto_prefix=True):
        super().__init__(auto_prefix=auto_prefix)
        self.network = nn.SequentialCell()

        self.network.append(nn.Conv2d(1, 32, 5, 2))
        self.network.append(nn.BatchNorm2d(32))
        self.network.append(nn.LeakyReLU())

        self.network.append(nn.Conv2d(32, 64, 5, 2))
        self.network.append(nn.BatchNorm2d(64))
        self.network.append(nn.LeakyReLU())

        self.network.append(nn.Flatten())
        self.network.append(nn.Dense(64 * 7 * 7, 1))

    def construct(self, x):
        return self.network(x)

网络结构的定义就是这样了,接下来是训练部分

训练

使用mindspore自带接口创建mnist数据集。在这个地方,为了使用mindspore的数据下沉功能加快计算,对数据集做了一些处理。主要的改变是:1.将输入图片归一化。2.在数据中通过map操作添加随机噪声(生成器的输入)这一列。这里没有选择在训练时创建随机噪声,是想将随机噪声和mnist数据一起下沉,因为数据下沉功能需要使用DatasetHelper包装Dataset(感觉这样会快一些。不过我并没有测试在训练时创建随机噪声和这种方法到底谁快。感兴趣的可以去试试~)。

from mindspore import nn
import mindspore.dataset as ds
import mindspore.dataset.vision.c_transforms as CV
from mindspore.train.dataset_helper import DatasetHelper, connect_network_with_dataset
from mindspore import Tensor
from mindspore.common import dtype as mstype
from mindspore import context
import os
import numpy as np
from cells import SigmoidCrossEntropyWithLogits, GenWithLossCell, DisWithLossCell, TrainOneStepCell, Reshape
import matplotlib.pyplot as plt
import time

def create_dataset(data_path,
                   latent_size,
                   batch_size,
                   repeat_size=1,
                   num_parallel_workers=1):
    mnist_ds = ds.MnistDataset(data_path)
    hwc2chw_op = CV.HWC2CHW()

    mnist_ds = mnist_ds.map(
        input_columns="image",
        operations=lambda x: ((x - 127.5) / 127.5).astype("float32"),
        num_parallel_workers=num_parallel_workers,
    )
    mnist_ds = mnist_ds.map(
        input_columns="image",
        operations=hwc2chw_op,
        num_parallel_workers=num_parallel_workers,
    )
    mnist_ds = mnist_ds.map(
        input_columns="image",
        operations=lambda x: (
            x,
            np.random.normal(size=(latent_size)).astype("float32"),
        ),
        output_columns=["image", "latent_code"],
        column_order=["image", "latent_code"],
        num_parallel_workers=num_parallel_workers,
    )
    buffer_size = 10000
    mnist_ds = mnist_ds.shuffle(buffer_size=buffer_size)
    mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True)
    mnist_ds = mnist_ds.repeat(repeat_size)

    return mnist_ds

然后就是训练代码了,和前一篇教程几乎一样:


def save_imgs(gen_imgs, idx):
    for i in range(gen_imgs.shape[0]):
        plt.subplot(4, 4, i + 1)
        plt.imshow(gen_imgs[i, 0, :, :] * 127.5 + 127.5, cmap="gray")
        plt.axis("off")
    plt.savefig("./images/{}.png".format(idx))
    
batch_size = 128
epochs = 30
input_dim = 100
lr = 0.0002

netG = Generator(input_dim)
netD = Discriminator()
loss = SigmoidCrossEntropyWithLogits()
netG_with_loss = GenWithLossCell(netG, netD, loss)
netD_with_loss = DisWithLossCell(netG, netD, loss)
optimizerG = nn.Adam(netG.trainable_params(), lr, beta1=0.5, beta2=0.999)
optimizerD = nn.Adam(netD.trainable_params(), lr, beta1=0.5, beta2=0.999)
net_train = TrainOneStepCell(netG_with_loss, netD_with_loss, optimizerG,
                             optimizerD)

ds = create_dataset(os.path.join("../data/MNIST_Data", "train"),
                    latent_size=input_dim,
                    batch_size=batch_size,
                    num_parallel_workers=2)
dataset_helper = DatasetHelper(ds, epoch_num=epochs, dataset_sink_mode=True)
net_train = connect_network_with_dataset(net_train, dataset_helper)

netG.set_train()
netD.set_train()
test_latent_code=Tensor(np.random.normal(size=(16, input_dim)),
                         dtype=mstype.float32)
                         
for epoch in range(epochs):
    start = time.time()
    for data in dataset_helper:
        imgs = data[0]
        latent_code = data[1]
        d_out, g_out = net_train(imgs, latent_code)
    t = time.time() - start
    print("time of epoch {} is {:.2f}s".format(epoch, t))
    gen_imgs = netG(test_latent_code)
    save_imgs(gen_imgs.asnumpy(), epoch)

可以看一下训练结果:

结果嘛。。。勉强能看,还能认出是个数字。经验上来看,dcgan的训练比较依赖超参数,可以慢慢去调参~~也有可能是训练不充分导致,或者去使用别的网络结构。
那么,有没有简单好使的方法生成MNSIT手写数字呢。那当然是有的,就是下一篇文章将会提到的CGAN——条件生成对抗网络。加了条件约束之后,CGAN在MNIST手写数字上的生成效果会变得相当可观,而且训练也很容易。

MindSpore实现DCGAN到此结束。下一篇文章再见(然而又不知道拖到什么时候了)。

你可能感兴趣的:(生成对抗网络,MindSpore,深度学习,神经网络)