DCGANs: 用DCGAN生成图片(MNIST)

由于接下来要做医学图像的东西,老板先让我跑着TF来玩玩积累经验。
作为一个非计算机科班出身的人,写代码与理解代码的能力尤为重要。
本文根据参考博客(当然原博客很多细节没强调到甚至是错的)进行了代码修改和体会,其中收益颇多。

强调: 学习本博客一定要参考原博客的详细过程阐释作为基础,同时结合我代码里面的注释,才能达到最佳的学习效果。

重点:

  • 对于generator的理解,请参考DCGAN原arxiv论文,其中值得注意的是在U-net曾经出现过的’deconvolution layer’实际上称为‘fractionally-strided convolution layer’
  • 对于参考博客中generator存在的问题进行了修正,主要体现在tf.add(H_conv1, b_conv1)的修正
  • 对于reuse经常出现的问题进行深度挖掘发现,自己对‘reuse’的使用不熟悉
    if (reuse):tf.get_variable_scope().reuse_variables(),因为在discriminnator应用到生成图像的时候,肯定需要原有的参数,所以要保证reuse=True,因此有了Dg = model.discriminator(Gz, reuse=True)的代码。
  • 多说一句:tf.variable_scope()工作域中的reuse=False为默认,且要知道reuse具有继承性质,细节请参考这篇简书,写的相当好!
  • 对于tf.nn.conv2d_transpose()的理解,请一定仔细理解为什么需要output_shape,因为不指定的话,输出的shape形式不唯一
  • 一定要学会在pool时候的size变换,这不仅仅关系到卷积,更多的时候关系到”反卷积“的size变换
  • 代码共分为两段,第一段为model.py着重在于建模;第二段为train.py着重于训练。
  • 本博客中的代码跟原博客的代码有所不同,这代表着博主自己的’思考’

俗话说磨刀不误砍柴功,先仔细结合原博客仔细理解tf编程的思路,以及DCGAN的实现细节,才能知道关键问题会在哪里出现。
俗话还说,实践出真知,一定要玩玩自己的数据集才知道问题在哪里,接下来会相应更新自己用DCGAN生成医学图像的代码,让大家体会下笨蛋的成长之路。

# -*- coding: utf-8 -*-
"""
Created on Tue Jul 24 20:33:14 2018
E-mail: [email protected]
@author: DidiLv
File name: model.py
"""


import tensorflow as tf
import numpy as np


# import data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/") # MNIST dataset

# pooling and convolution definition
def conv2d(x, W):
    return tf.nn.conv2d(input = x, filter = W, strides = [1,1,1,1], padding = 'SAME')

def avg_pool_2x2(x):
    return tf.nn.avg_pool(x, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'SAME')

def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)

# discriminator
def discriminator(x_image, reuse=False):
    with tf.variable_scope('discriminator') as scope:
    ## here omit the reuse since the tf.variable_scope().reuse == False by default
        if (reuse):
            tf.get_variable_scope().reuse_variables()
        #First Conv and Pool Layers
        W_conv1 = tf.get_variable('d_wconv1', shape = [5, 5, 1, 8], initializer=tf.truncated_normal_initializer(stddev=0.02))
        b_conv1 = tf.get_variable('d_bconv1', shape = [8], initializer=tf.constant_initializer(0))
        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)        
        h_pool1 = avg_pool_2x2(h_conv1)

        #Second Conv and Pool Layers
        W_conv2 = tf.get_variable('d_wconv2', shape = [5, 5, 8, 16], initializer=tf.truncated_normal_initializer(stddev=0.02))
        b_conv2 = tf.get_variable('d_bconv2', shape = [16], initializer=tf.constant_initializer(0))
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
        h_pool2 = avg_pool_2x2(h_conv2)

        #First Fully Connected Layer
        W_fc1 = tf.get_variable('d_wfc1', [7 * 7 * 16, 32], initializer=tf.truncated_normal_initializer(stddev=0.02))
        b_fc1 = tf.get_variable('d_bfc1', [32], initializer=tf.constant_initializer(0))
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*16])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

        #Second Fully Connected Layer
        W_fc2 = tf.get_variable('d_wfc2', [32, 1], initializer=tf.truncated_normal_initializer(stddev=0.02))
        b_fc2 = tf.get_variable('d_bfc2', [1], initializer=tf.constant_initializer(0))

        #Final Layer
        y_conv=(tf.matmul(h_fc1, W_fc2) + b_fc2)
    return y_conv


# generator from DCGAN, take a d-dimensional vector as input and upsample it to become a 28*28 image
# the structure is from https://arxiv.org/pdf/1511.06434v2.pdf
def generator(z, batch_size, z_dim, reuse = False):
    with tf.variable_scope('generator') as scope:
        if (reuse):
            tf.get_variable_scope().reuse_variables()
        ## number of filters for the first layer of generator 
        g_dim = 64
        ## color dimension of output 
        c_dim = 1
        ## size of output image
        s = 28
        s2, s4, s8, s16 = int(s/2), int(s/4), int(s/8), int(s/16)

        # h0 dimension is [batch_size, z_width, z_height, z_channel] 
        h0 = tf.reshape(z, [batch_size, s16+1, s16+1, 25])
        h0 = tf.nn.relu(h0)
        ##Dimensions of h0 = batch_size x 2 x 2 x 25

        # first decovolution layer (fractionally-strided convolution layer)

        ## useful link for convolution :
        ## https://blog.csdn.net/mao_xiao_feng/article/details/71713358
        output1_shape = [batch_size, s8, s8, g_dim*4]
        ## W_conv1 shape = [filter_height, filter_width, out_channels, in_channels]
        W_conv1 = tf.get_variable('g_wconv1', shape = [5,5,output1_shape[-1],int(h0.get_shape()[-1])],
                                    initializer=tf.truncated_normal_initializer(stddev = 0.1)
                                    )
        b_conv1 = tf.get_variable('g_bconv1', shape = [output1_shape[-1]], initializer=tf.constant_initializer(.1))
        ## H_conv1: h0 * W_conv1.T
        H_conv1 = tf.nn.conv2d_transpose(h0, W_conv1, output_shape = output1_shape, strides = [1,2,2,1], 
                                         padding = 'SAME')
        H_conv1 = tf.add(H_conv1, b_conv1)
        H_conv1 = tf.contrib.layers.batch_norm(inputs = H_conv1, center=True, scale=True, is_training=True, scope="g_bn1")
        H_conv1 = tf.nn.relu(H_conv1)
        ##Dimensions of H_conv1 = batch_size x 3 x 3 x 256

        # second deconvolution layer
        output2_shape = [batch_size, s4-1, s4-1, g_dim*2]
        W_conv2 = tf.get_variable('g_wconv2', shape = [5,5,output2_shape[-1], int(H_conv1.get_shape()[-1])],
                                  initializer=tf.truncated_normal_initializer(stddev = 0.1))
        b_conv2 = tf.get_variable('g_bconv2', shape = [output2_shape[-1]], initializer=tf.truncated_normal_initializer(0.1))
        H_conv2 = tf.nn.conv2d_transpose(H_conv1, W_conv2, output_shape = output2_shape, strides = [1,2,2,1],
                               padding = 'SAME')
        H_conv2 = tf.add(H_conv2, b_conv2)
        H_conv2 = tf.contrib.layers.batch_norm(inputs = H_conv2, center=True, scale=True, is_training=True, scope="g_bn2")    
        ##Dimensions of H_conv2 = batch_size x 6 x 6 x 128
        H_conv2 = tf.nn.relu(H_conv2)


        #third DeConv Layer
        output3_shape = [batch_size, s2 - 2, s2 - 2, g_dim*1]
        W_conv3 = tf.get_variable('g_wconv3', [5, 5, output3_shape[-1], int(H_conv2.get_shape()[-1])], 
                                  initializer=tf.truncated_normal_initializer(stddev=0.1))
        b_conv3 = tf.get_variable('g_bconv3', [output3_shape[-1]], initializer=tf.constant_initializer(.1))
        H_conv3 = tf.nn.conv2d_transpose(H_conv2, W_conv3, output_shape=output3_shape, strides=[1, 2, 2, 1], 
                                         padding='SAME')
        H_conv3 = tf.add(H_conv3, b_conv3)
        H_conv3 = tf.contrib.layers.batch_norm(inputs = H_conv3, center=True, scale=True, is_training=True, scope="g_bn3")
        H_conv3 = tf.nn.relu(H_conv3)
        #Dimensions of H_conv3 = batch_size x 12 x 12 x 64

        #Fourth DeConv Layer
        output4_shape = [batch_size, s, s, c_dim]
        W_conv4 = tf.get_variable('g_wconv4', [5, 5, output4_shape[-1], int(H_conv3.get_shape()[-1])], 
                                  initializer=tf.truncated_normal_initializer(stddev=0.1))
        b_conv4 = tf.get_variable('g_bconv4', [output4_shape[-1]], initializer=tf.constant_initializer(.1))
        H_conv4 = tf.nn.conv2d_transpose(H_conv3, W_conv4, output_shape=output4_shape, strides=[1, 2, 2, 1], 
                                         padding='VALID')
        H_conv4 = tf.add(H_conv4, b_conv4)
        H_conv4 = tf.nn.tanh(H_conv4)
        #Dimensions of H_conv4 = batch_size x 28 x 28 x 1
        return H_conv4


# -*- coding: utf-8 -*-
"""
Created on Wed Jul 25 09:42:35 2018
E-mail: [email protected]
@author: DidiLv
File name: train.py
"""
import model

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import random

# import data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/") # MNIST dataset


# reset the graph to reset all variables we test before
tf.reset_default_graph()

batch_size = 16
z_dimensions = 2*2*25 # details can be found in module of model: reshape of h0


# discriminator for input
x_placeholder = tf.placeholder(dtype = tf.float32, shape = [None, 28, 28, 1])
z_placeholder = tf.placeholder(dtype = tf.float32, shape = [None,z_dimensions])

Dx = model.discriminator(x_placeholder) # for real training data
Gz = model.generator(z_placeholder, batch_size, z_dimensions)
Dg = model.discriminator(Gz, reuse=True)


g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=Dg, labels=tf.ones_like(Dg)))
d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=Dx, labels = tf.ones_like(Dx)))
d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=Dg, labels = tf.zeros_like(Dx)))
d_loss = d_loss_real + d_loss_fake

tvars = tf.trainable_variables()
d_vars = [var for var in tvars if 'd_' in var.name]
g_vars = [var for var in tvars if 'g_' in var.name]

with tf.variable_scope(tf.get_variable_scope(), reuse = False):
    # var_list: tf.Variable to update to minimize loss
    trainerD = tf.train.AdadeltaOptimizer(learning_rate = 1e-3).minimize(d_loss, var_list = d_vars)
    trainerG = tf.train.AdadeltaOptimizer(learning_rate = 1e-3).minimize(g_loss, var_list = g_vars)



sess = tf.Session()
sess.run(tf.global_variables_initializer())
iterations = 3000
for i in range(iterations):
    z_batch = np.random.normal(-1, 1, size=[batch_size, z_dimensions])
    real_image_batch = mnist.train.next_batch(batch_size)
    real_image_batch = np.reshape(real_image_batch[0],[batch_size,28,28,1])
    _,dLoss = sess.run([trainerD, d_loss],feed_dict={z_placeholder:z_batch,x_placeholder:real_image_batch}) #Update the discriminator
    _,gLoss = sess.run([trainerG, g_loss],feed_dict={z_placeholder:z_batch}) #Update the generator
    print((dLoss+gLoss))

以上代码为DCGAN代码的全部,接下来的test.py文件是博主自己尝试用tf.Variable()构建变量的时候采用的测试代码,也是ok的!
注意:

  • 利用了新的初始化xavier_init()函数进行初始化
  • tf.nn.conv2d()中应用的代码通常为tf.float32的形式,所以要把x_image进行转化
  • tensorflow中的变量要想查看必须建立session后才能查看,举个例子:
    import tensorflow as tf
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    print(sess.run(D_noise))

    这样才能看到下面test.py文件的D_noise的数值,否则你只能看到他的形状以及类型
# -*- coding: utf-8 -*-
"""
Created on Wed Jul 25 16:02:22 2018
E-mail: [email protected]
@author: DidiLv
"""


import tensorflow as tf
import numpy as np

# pooling and convolution definition
def conv2d(x, W):
    return tf.nn.conv2d(input = x, filter = W, strides = [1,1,1,1], padding = 'SAME')

def avg_pool_2x2(x):
    return tf.nn.avg_pool(x, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'SAME')

def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)
def sample_z(shape):
    return np.random.uniform(-1., 1., size=shape)


def discriminator(x_image):
#    with tf.variable_scope('discriminator') as scope:
#        if (reuse):
#            tf.get_variable_scope().reuse_variables()
    # first conv and pool layers: 
    ## W: [filter_Width, filter_height, filter_channel, filter_numbers]
    ## b:[filter_number]
#    W_conv1 = tf.get_variable('d_wconv1', shape = [5,5,1,8], initializer = tf.truncated_normal_initializer(stddev = 0.02))
    W_conv1 = tf.Variable(xavier_init([5,5,1,8]))
    b_conv1 = tf.Variable(xavier_init([8]))
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1= avg_pool_2x2(h_conv1)

    # second conv and pool layers:
    ## h_pool channel number is 8 
    W_conv2 = tf.Variable(xavier_init([5,5,8,16]))
    b_conv2 = tf.Variable(xavier_init([16]))
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = avg_pool_2x2(h_conv2)

    # first fully connected layer
    ## h_pool channel number is 16 and the [weight, width] = [x_image_width, x_image_height] ./ [strides = 2] ./ [strides = 2]
    ## [28,28]./2./2 = [7,7] 
    W_fc1 = tf.Variable(xavier_init([7 * 7 * 16, 32]))
    b_fc1 = tf.Variable(xavier_init([32]))
    ## since the following layer is fully connected, we have to reshape the image to a vector
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*16])
    ## the following is the matrix multiply 
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    # second fully connected layer
    W_fc2 = tf.Variable(xavier_init([32,1]))
    b_fc2 = tf.Variable(xavier_init([1]))

    y_conv = tf.add(tf.matmul(h_fc1, W_fc2), b_fc2)
    return y_conv

# random create a image(in fact, it's a noise)
x_image = tf.Variable(sample_z([1,28,28,1]),dtype = tf.float32)

D_noise = discriminator(x_image)

你可能感兴趣的:(图像处理,算法,机器学习,Tensorflow,Python)