DCGAN叫做深层卷积生成对抗网络,它是在GAN的基础上把GAN的生成模型和判别模型用CNN 实现,而不是简单的多层感知机。此外,论文还对CNN 进行改进,去掉了了CNN 中的全连接层,批量归一化处理,使用了反卷积操作,以及使用了LReLu激活函数等等。参考论文:《Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks》,Github代码地址:https://github.com/carpedm20/DCGAN-tensorflow。在作者源码的基础上稍作修改,使之用于mnist数据集,收敛速度很快,代码如下:
#coding=utf-8
import tensorflow as tf
import pickle
import os
import numpy as np
from scipy.misc import imsave
import matplotlib.gridspec as gridspec
import shutil
import math
# 定义一个mnist数据集的类
class mnistReader():
def __init__(self,mnistPath,onehot=True):
self.mnistPath=mnistPath
self.onehot=onehot
self.batch_index=0
print ('read:',self.mnistPath)
fo = open(self.mnistPath, 'rb')
self.train_set,self.valid_set,self.test_set = pickle.load(fo,encoding='bytes')
fo.close()
self.data_label_train=list(zip(self.train_set[0],self.train_set[1]))
np.random.shuffle(self.data_label_train)
# 获取下一个训练集的batch
def next_train_batch(self,batch_size=100):
if self.batch_index < int(len(self.data_label_train)/batch_size):
# print ("batch_index:",self.batch_index )
datum=self.data_label_train[self.batch_index*batch_size:(self.batch_index+1)*batch_size]
self.batch_index+=1
return self._decode(datum,self.onehot)
else:
self.batch_index=0
np.random.shuffle(self.data_label_train)
datum=self.data_label_train[self.batch_index*batch_size:(self.batch_index+1)*batch_size]
self.batch_index+=1
return self._decode(datum,self.onehot)
# 获取样本标签,作为生成图片的条件
def get_sample_label(self,batch_size=64):
sample=self.train_set[1][0:batch_size]
rlabel=list()
for index in sample:
hot=np.zeros(10)
hot[int(index)]=1
rlabel.append(hot)
return rlabel
# 把label变成one-hot向量
def _decode(self,datum,onehot):
rdata=list() # batch训练数据
rlabel=list()
if onehot:
for d,l in datum:
rdata.append(np.reshape(d,[28,28,1]))
hot=np.zeros(10)
hot[int(l)]=1 # label设为10维的one-hot向量
rlabel.append(hot)
else:
for d,l in datum:
rdata.append(np.reshape(d,[28,28,1]))
rlabel.append(int(l))
return rdata,rlabel
# 批量归一化类的定义
class batch_norm(object):
def __init__(self, epsilon=1e-5, momentum = 0.9, name="batch_norm"):
with tf.variable_scope(name):
self.epsilon = epsilon
self.momentum = momentum
self.name = name
def __call__(self, x, train=True):
return tf.contrib.layers.batch_norm(x,\
decay=self.momentum, \
updates_collections=None,\
epsilon=self.epsilon, \
scale=True, \
is_training=train,\
scope=self.name)
input_height=28 # 输入图像高度
input_width=28 # 输入图像宽度
output_height=28 # 输出图像高度
output_width=28 # 输出图像宽度
max_epoch=100 # 最大的迭代次数
batch_size=64 # batch块大小
y_dim=10 # 条件向量的维度,如果不用可以设置为 None
z_dim=100 # 噪声向量的维度
gf_dim=64 # 生成器的第一层卷积层卷积核个数
df_dim=64 # 判别器的第一层卷积层卷积核个数
gfc_dim=1024 # 全连接层单元数
dfc_dim=1024 # 全连接层单元数
c_dim=1 # 图片的通道数
output_path="DCGAN" # 保存路径
# 定义判别器,batch_size=64,image的维度为[64 28 28 1],y的维度是[64 10],
def discriminator(image, y=None, reuse=False):
# 定义变量的命名空间
with tf.variable_scope("discriminator") as scope:
if reuse:
scope.reuse_variables() # 表示复用命名空间内的变量
d_bn1 = batch_norm(name='d_bn1')
d_bn2 = batch_norm(name='d_bn2')
if not y_dim:
d_bn3 = batch_norm(name='d_bn3')
# 如果y_dim为 None,表示没有条件变量
if not y_dim:
# 第一层卷积,h0维度为[64 14 14 64],卷积核均为5*5,步长为 2,
# 填充方式为same,所以卷积后输出图像为原来的1/4
h0 = lrelu(conv2d(image, df_dim, name='d_h0_conv'))
# 第二层卷积,h1的形状为 [64 7 7 128]
h1 = lrelu(d_bn1(conv2d(h0, df_dim*2, name='d_h1_conv')))
# 第三层卷积,h2的形状为 [64 4 4 256]
h2 = lrelu(d_bn2(conv2d(h1, df_dim*4, name='d_h2_conv')))
#第四层卷积,h3的形状为 [64 2 2 512]
h3 = lrelu(d_bn3(conv2d(h2, df_dim*8, name='d_h3_conv')))
# 把tensor展开为一维向量[64 2048],输入线性函数,h4的形状为 [64 1]
h4 = linear(tf.reshape(h3, [batch_size, -1]), 1, 'd_h4_lin')
# 输入sigmod函数,求出概率值,形状为[64 1]
return tf.nn.sigmoid(h4), h4
# 如果y_dim为真,表示有条件变量
else:
# 这里yb的维度[64 1 1 10]
yb = tf.reshape(y, [batch_size, 1, 1, y_dim])
'''
x将image和 yb连接起来,image的维度为[64 28 28 1]。这里先将yb和[64 28 28 1]的单位
张量进行了逐元素积,得到张量[64 28 28 10],逐元素积用到了广播机制,然后[64 28 28 10]
和image在第4维进行矩阵连接得到[64 28 28 11].这相当于是使用了Conditional GAN,为图像
提供标签作为条件信息,将x=[64 28 28 11]输入到卷积层conv2d
'''
x = conv_cond_concat(image, yb)
# 第一个卷积层,卷积后和yb进行融合,h0维度为[64 14 14 21]
h0 = lrelu(conv2d(x, c_dim + y_dim, name='d_h0_conv'))
h0 = conv_cond_concat(h0, yb)
# 第二个卷积层,h1的维度为[64 7*7*74+10]
h1 = lrelu(d_bn1(conv2d(h0, df_dim + y_dim, name='d_h1_conv')))
h1 = tf.reshape(h1, [batch_size, -1])
h1 = tf.concat([h1, y], 1)
# 线性输入层 1 , h2的维度为[64 1024+10]
h2 = lrelu(d_bn2(linear(h1, dfc_dim, 'd_h2_lin')))
h2 = tf.concat([h2, y], 1)
# 线性输出层 2 , h3的维度为[64 1]
h3 = linear(h2, 1, 'd_h3_lin')
# 输入sigmod函数,求出概率值,形状为[64 1]
return tf.nn.sigmoid(h3), h3
# 定义生成器,在这里z为平均分布的随机分布数,z的维度为[64 100],y的维度为[64 10]
def generator(z, y):
with tf.variable_scope("generator") as scope:
g_bn0 = batch_norm(name='g_bn0')
g_bn1 = batch_norm(name='g_bn1')
g_bn2 = batch_norm(name='g_bn2')
if not y_dim:
g_bn3 = batch_norm(name='g_bn3')
#如果条件变量不存在
if not y_dim:
# s_h和s_w为 28
s_h, s_w = output_height, output_width
#s_h2和s_w2为 14
s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2)
# s_h4和s_w4为 7
s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2)
# s_h8, s_w8为 4
s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2)
# s_h16, s_w16为 2
s_h16, s_w16 = conv_out_size_same(s_h8, 2), conv_out_size_same(s_w8, 2)
# 对噪声变量z全连接,隐射为 [64 64*8*2*2] 维向量
z_, h0_w, h0_b = linear(z, gf_dim*8*s_h16*s_w16, 'g_h0_lin', with_w=True)
# 改变其形状为 [64 2 2 64*8]
h0 = tf.reshape(z_, [-1, s_h16, s_w16, gf_dim * 8])
h0 = tf.nn.relu(g_bn0(h0))
# 反卷积第一层,输出h1形状为:[64 4 4 256]
h1, h1_w, h1_b = deconv2d(h0, [batch_size, s_h8, s_w8, gf_dim*4], name='g_h1', with_w=True)
h1 = tf.nn.relu(g_bn1(h1))
# 反卷积第二层,输出h2的形状为 [64 7 7 128]
h2, h2_w, h2_b = deconv2d(h1, [batch_size, s_h4, s_w4, gf_dim*2], name='g_h2', with_w=True)
h2 = tf.nn.relu(g_bn2(h2))
# 反卷机第三层,输出的形状为 [64 14 14 64]
h3, h3_w, h3_b = deconv2d(h2, [batch_size, s_h2, s_w2, gf_dim*1], name='g_h3', with_w=True)
h3 = tf.nn.relu(g_bn3(h3))
# 反卷机第四层,输出为[64 28 28 1]
h4, h4_w, h4_b = deconv2d(h3, [batch_size, s_h, s_w, c_dim], name='g_h4', with_w=True)
# 输入tanh函数,得到[-1,1]之间的值
return tf.nn.tanh(h4)
# 如果条件变量存在
else:
# s_h,s_w为 28
s_h, s_w = output_height, output_width
# s_h2,s_h4分别为 14,7
s_h2, s_h4 = int(s_h/2), int(s_h/4)
# s_w2,s_w4分别为 14 ,7
s_w2, s_w4 = int(s_w/2), int(s_w/4)
#y的维度为[64 10],z的维度为 [64 100],z与y连接后为[64 110]
z = tf.concat([z, y], 1)
# h0的维度是[64 1024+10]
h0 = tf.nn.relu(g_bn0(linear(z, gfc_dim, 'g_h0_lin')))
h0 = tf.concat([h0, y], 1)
# h1的维度为[64 7 7 128+10]
h1 = tf.nn.relu(g_bn1(linear(h0, gf_dim*2*s_h4*s_w4, 'g_h1_lin')))
h1 = tf.reshape(h1, [batch_size, s_h4, s_w4, gf_dim * 2])
yb = tf.reshape(y, [batch_size, 1, 1, y_dim])
h1 = conv_cond_concat(h1, yb)
# 第一次翻卷机,h2的维度是[64 14 14 128+10]
h2 = tf.nn.relu(g_bn2(deconv2d(h1,[batch_size, s_h2, s_w2, gf_dim * 2], name='g_h2')))
h2 = conv_cond_concat(h2, yb)
# 第二次反卷积,返回的形状为[64 28 28 1]
return tf.nn.sigmoid(deconv2d(h2, [batch_size, s_h, s_w, c_dim], name='g_h3'))
# 卷积的尺寸除以步长
def conv_out_size_same(size, stride):
return int(math.ceil(float(size) / float(stride)))
# 将两个tensor进行融合,这里x代表图像,y代表条件变量
def conv_cond_concat(x, y):
x_shapes = x.get_shape()
y_shapes = y.get_shape()
# 这里的点乘用到的TensorFlow的广播机制
return tf.concat([x, y*tf.ones([x_shapes[0], x_shapes[1], x_shapes[2], y_shapes[3]] ) ], 3)
# 正常的卷积操作
def conv2d(input_,output_dim,k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,name="conv2d"):
with tf.variable_scope(name):
w = tf.get_variable('w', [k_h, k_w, input_.get_shape()[-1], output_dim],initializer=tf.truncated_normal_initializer(stddev=stddev))
'''
tf.get_variable:可以用来创建或者获取变量,当创建变量时,与tf.Variable是一样的。
k_h: 卷积核高度
k_w:卷积核尺寸
input_.get_shape()[-1] :卷积核的通道数
d_h:卷积纵向步长
d_w:卷积的横向步长
output_dim: 卷积核个数
'''
conv = tf.nn.conv2d(input_, w, strides=[1, d_h, d_w, 1], padding='SAME')
biases = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0))
conv = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape())
return conv
# 反卷积操作定义,反卷积是正卷积的逆向操作
def deconv2d(input_,output_shape,k_h=5,k_w=5,d_h=2,d_w=2,stddev=0.02,name="deconv2d",with_w=False):
with tf.variable_scope(name):
# filter : [height, width, output_channels, in_channels]
w = tf.get_variable('w', [k_h, k_w, output_shape[-1], input_.get_shape()[-1]],
initializer=tf.random_normal_initializer(stddev=stddev))
try:
deconv = tf.nn.conv2d_transpose(input_, w, output_shape=output_shape,
strides=[1, d_h, d_w, 1])
# Support for verisons of TensorFlow before 0.7.0
except AttributeError:
deconv = tf.nn.deconv2d(input_, w, output_shape=output_shape,strides=[1, d_h, d_w, 1])
biases = tf.get_variable('biases', [output_shape[-1]], initializer=tf.constant_initializer(0.0))
deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape())
if with_w:
return deconv, w, biases
else:
return deconv
# lrelu激活函数的定义
def lrelu(x, leak=0.2, name="lrelu"):
return tf.maximum(x, leak*x)
# 线性变换函数,相当于全连接层
def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False):
shape = input_.get_shape().as_list()
with tf.variable_scope(scope or "Linear"):
matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32,tf.random_normal_initializer(stddev=stddev))
bias = tf.get_variable("bias", [output_size],initializer=tf.constant_initializer(bias_start))
if with_w:
return tf.matmul(input_, matrix) + bias, matrix, bias
else:
return tf.matmul(input_, matrix) + bias
# 训练模型
def train():
# 定义输入的各变量形状
y = tf.placeholder(tf.float32, [batch_size, y_dim], name='y')
image_dims = [input_height, input_width, c_dim]
inputs=tf.placeholder(tf.float32, [batch_size] + image_dims, name='real_images')
z = tf.placeholder(tf.float32, [batch_size, z_dim], name='z')
# 定义生成器和判别器的计算
G = generator(z, y)
D, D_logits = discriminator(inputs, y, reuse=False)
D_, D_logits_ = discriminator(G, y, reuse=True)
# 定义损失函数
d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logits, labels=tf.ones_like(D)))
d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logits_, labels=tf.zeros_like(D_)))
g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logits_, labels=tf.ones_like(D_)))
d_loss = d_loss_real + d_loss_fake
# 定义需要优化的参数
t_vars = tf.trainable_variables() #显示图中可训练的变量。
d_vars = [var for var in t_vars if 'd_' in var.name]
g_vars = [var for var in t_vars if 'g_' in var.name]
# 定义优化器
d_optim = tf.train.AdamOptimizer(0.0001).minimize(d_loss, var_list=d_vars)
g_optim = tf.train.AdamOptimizer(0.0001).minimize(g_loss, var_list=g_vars)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
if os.path.exists(output_path):
shutil.rmtree(output_path) # 删除目录树
os.mkdir(output_path) # 重新创建目录树
sample_z = np.random.uniform(-1, 1, size=(batch_size , z_dim))
mnist=mnistReader(mnistPath="E:/testdata/mnist.pkl")
counter = 1
for i in range(max_epoch):
for j in range(100):
print ("epoch:%s, iter:%s" % (i, j) )
batch_images, batch_labels=mnist.next_train_batch(batch_size=batch_size)
batch_z = np.random.uniform(-1, 1, [batch_size, z_dim]).astype(np.float32)
sess.run(d_optim, feed_dict={inputs: batch_images,z: batch_z,y:batch_labels})
sess.run(g_optim, feed_dict={z: batch_z, y:batch_labels})
sess.run(g_optim, feed_dict={z: batch_z, y:batch_labels})
sample_labels=mnist.get_sample_label(batch_size=batch_size)
samples = sess.run(G,feed_dict={z: sample_z,y:sample_labels})
show_result(samples, os.path.join(output_path, "random_sample%s.jpg" % counter))
counter+=1
# 保存生成的图片结果
def show_result(batch_res, fname, grid_size=(8, 8), grid_pad=5):
# 有条件变量的GAN用了sigmod函数,没有条件变量的用了tanh函数
if not y_dim:
batch_res =0.5*batch_res.reshape((batch_res.shape[0], 28, 28))+0.5
else:
batch_res = batch_res.reshape((batch_res.shape[0], 28, 28))
img_h, img_w = batch_res.shape[1], batch_res.shape[2]
grid_h = img_h * grid_size[0] + grid_pad * (grid_size[0] - 1)
grid_w = img_w * grid_size[1] + grid_pad * (grid_size[1] - 1)
img_grid = np.zeros((grid_h, grid_w), dtype=np.uint8)
for i, res in enumerate(batch_res):
if i >= grid_size[0] * grid_size[1]:
break
img = (res) * 255 # 生成器生成的是0-1的值,所以要乘以255变成像素值
img = img.astype(np.uint8)
row = (i // grid_size[0]) * (img_h + grid_pad)
col = (i % grid_size[1]) * (img_w + grid_pad)
img_grid[row:row + img_h, col:col + img_w] = img
imsave(fname, img_grid)
if __name__ == '__main__':
train()
第一次epoch和第十四次epoch的结果如下: