WGAN-GP模型可以在原来GAN代码实现的基础上仅做少量修改。WGAN-GP模型的判别器D的输出不再是样本类别的概率,输出不需要加Sigmoid激活函数。同时添加梯度惩罚项,实现如下:
def gradient_penalty(discriminator, batch_x, fake_image):
# 梯度惩罚项计算函数
batchsz = batch_x.shape[0]
# 每个样本均随机采样t,用于差值,[b, h, w, c]
t = tf.random.uniform([batchsz, 1, 1, 1])
# 自动扩展为x的形状,[b, 1, 1, 1] => [b, h, w, c]
t = tf.broadcast_to(t, batch_x.shape)
# 在真假图片之间做线性差值
interplate = t * batch_x + (1 - t) * fake_image
# 在梯度环境中计算D对差值样本的梯度
with tf.GradientTape() as tape:
tape.watch([interplate]) # 加入梯度观察列表
d_interplote_logits = discriminator(interplate, training=True)
grads = tape.gradient(d_interplote_logits, interplate)
# 计算每个样本的梯度的范数:grads:[b, h, w, c] => [b, -1]
grads = tf.reshape(grads, [grads.shape[0], -1])
gp = tf.norm(grads, axis=1) # [b]
# 计算梯度惩罚项
gp = tf.reduce_mean((gp - 1) ** 2)
return gp
WGAN判别器的损失函数计算与GAN不一样,WGAN是直接最大化真实样本的输出值,最小化生成样本的输出值,并没有交叉熵计算的过程。代码实现如下:
def d_loss_fn(generator, discriminator, batch_z, batch_x, is_training):
# 计算D的损失函数
# 1. treat real image as real
# 2. treat generated image as fake
fake_image = generator(batch_z, is_training) # 假样本
d_fake_logits = discriminator(fake_image, is_training) # 假样本的输出
d_real_logits = discriminator(batch_x, is_training) # 真样本的输出
d_loss_real = celoss_ones(d_real_logits)
d_loss_fake = celoss_zeros(d_fake_logits)
# 计算梯度惩罚项
gp = gradient_penalty(discriminator, batch_x, fake_image)
# WGAN-GP D损失函数的定义,这里并不是计算交叉熵,而是直接最大化正样本的输出
# 最小化假样本的输出和梯度惩罚项
loss = d_loss_real + d_loss_fake + 10. * gp
return loss, gp
WGAN生成器G的损失函数是只需要最大化生成样本在判别器D的输出值即可,同样没有交叉熵的计算步骤。代码实现如下:
def g_loss_fn(generator, discriminator, batch_z, is_training):
# 生成器的损失函数
fake_image = generator(batch_z, is_training)
d_fake_logits = discriminator(fake_image, is_training)
# WGAN-GP G损失函数,最大化假样本的输出值
loss = celoss_ones(d_fake_logits)
return loss
WGAN的朱训练逻辑基本相同,与原始的GAN相比,判别器D的作用是作为一个EM距离的计量器存在,因此判别器越准确,对生成器越有利,可以在训练一个Step时训练判别器D多次,训练G一次,从而获得较为准确的EM距离估计。
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class Generator(keras.Model):
def __init__(self):
super(Generator, self).__init__()
# z: [b, 100] => [b, 3*3*512] => [b, 3, 3, 512] => [b, 64, 64, 3]
self.fc = layers.Dense(3*3*512)
self.conv1 = layers.Conv2DTranspose(256, 3, 3, 'valid')
self.bn1 = layers.BatchNormalization()
self.conv2 = layers.Conv2DTranspose(128, 5, 2, 'valid')
self.bn2 = layers.BatchNormalization()
self.conv3 = layers.Conv2DTranspose(3, 4, 3, 'valid')
def call(self, inputs, training=None):
# [z, 100] => [z, 3*3*512]
x = self.fc(inputs)
x = tf.reshape(x, [-1, 3, 3, 512])
x = tf.nn.leaky_relu(x)
#
x = tf.nn.leaky_relu(self.bn1(self.conv1(x), training=training))
x = tf.nn.leaky_relu(self.bn2(self.conv2(x), training=training))
x = self.conv3(x)
x = tf.tanh(x)
return x
class Discriminator(keras.Model):
def __init__(self):
super(Discriminator, self).__init__()
# [b, 64, 64, 3] => [b, 1]
self.conv1 = layers.Conv2D(64, 5, 3, 'valid')
self.conv2 = layers.Conv2D(128, 5, 3, 'valid')
self.bn2 = layers.BatchNormalization()
self.conv3 = layers.Conv2D(256, 5, 3, 'valid')
self.bn3 = layers.BatchNormalization()
# [b, h, w ,c] => [b, -1]
self.flatten = layers.Flatten()
self.fc = layers.Dense(1)
def call(self, inputs, training=None):
x = tf.nn.leaky_relu(self.conv1(inputs))
x = tf.nn.leaky_relu(self.bn2(self.conv2(x), training=training))
x = tf.nn.leaky_relu(self.bn3(self.conv3(x), training=training))
# [b, h, w, c] => [b, -1]
x = self.flatten(x)
# [b, -1] => [b, 1]
logits = self.fc(x)
return logits
def main():
d = Discriminator()
g = Generator()
x = tf.random.normal([2, 64, 64, 3])
z = tf.random.normal([2, 100])
prob = d(x)
print(prob)
x_hat = g(z)
print(x_hat.shape)
if __name__ == '__main__':
main()
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from PIL import Image
import glob
from Chapter13.GAN import Generator, Discriminator
from Chapter13.dataset import make_anime_dataset
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
def save_result(val_out, val_block_size, image_path, color_mode):
def preprocess(img):
img = ((img + 1.0) * 127.5).astype(np.uint8)
# img = img.astype(np.uint8)
return img
preprocesed = preprocess(val_out)
final_image = np.array([])
single_row = np.array([])
for b in range(val_out.shape[0]):
# concat image into a row
if single_row.size == 0:
single_row = preprocesed[b, :, :, :]
else:
single_row = np.concatenate((single_row, preprocesed[b, :, :, :]), axis=1)
# concat image row to final_image
if (b + 1) % val_block_size == 0:
if final_image.size == 0:
final_image = single_row
else:
final_image = np.concatenate((final_image, single_row), axis=0)
# reset single row
single_row = np.array([])
if final_image.shape[2] == 1:
final_image = np.squeeze(final_image, axis=2)
Image.fromarray(final_image).save(image_path)
def celoss_ones(logits):
# [b, 1]
# [b] = [1, 1, 1, 1,]
# loss = tf.keras.losses.categorical_crossentropy(y_pred=logits,
# y_true=tf.ones_like(logits))
return - tf.reduce_mean(logits)
def celoss_zeros(logits):
# [b, 1]
# [b] = [1, 1, 1, 1,]
# loss = tf.keras.losses.categorical_crossentropy(y_pred=logits,
# y_true=tf.zeros_like(logits))
return tf.reduce_mean(logits)
def gradient_penalty(discriminator, batch_x, fake_image):
# 梯度惩罚项计算函数
batchsz = batch_x.shape[0]
# 每个样本均随机采样t,用于差值,[b, h, w, c]
t = tf.random.uniform([batchsz, 1, 1, 1])
# 自动扩展为x的形状,[b, 1, 1, 1] => [b, h, w, c]
t = tf.broadcast_to(t, batch_x.shape)
# 在真假图片之间做线性差值
interplate = t * batch_x + (1 - t) * fake_image
# 在梯度环境中计算D对差值样本的梯度
with tf.GradientTape() as tape:
tape.watch([interplate]) # 加入梯度观察列表
d_interplote_logits = discriminator(interplate, training=True)
grads = tape.gradient(d_interplote_logits, interplate)
# 计算每个样本的梯度的范数:grads:[b, h, w, c] => [b, -1]
grads = tf.reshape(grads, [grads.shape[0], -1])
gp = tf.norm(grads, axis=1) # [b]
# 计算梯度惩罚项
gp = tf.reduce_mean((gp - 1) ** 2)
return gp
def d_loss_fn(generator, discriminator, batch_z, batch_x, is_training):
# 计算D的损失函数
# 1. treat real image as real
# 2. treat generated image as fake
fake_image = generator(batch_z, is_training) # 假样本
d_fake_logits = discriminator(fake_image, is_training) # 假样本的输出
d_real_logits = discriminator(batch_x, is_training) # 真样本的输出
d_loss_real = celoss_ones(d_real_logits)
d_loss_fake = celoss_zeros(d_fake_logits)
# 计算梯度惩罚项
gp = gradient_penalty(discriminator, batch_x, fake_image)
# WGAN-GP D损失函数的定义,这里并不是计算交叉熵,而是直接最大化正样本的输出
# 最小化假样本的输出和梯度惩罚项
loss = d_loss_real + d_loss_fake + 10. * gp
return loss, gp
def g_loss_fn(generator, discriminator, batch_z, is_training):
# 生成器的损失函数
fake_image = generator(batch_z, is_training)
d_fake_logits = discriminator(fake_image, is_training)
# WGAN-GP G损失函数,最大化假样本的输出值
loss = celoss_ones(d_fake_logits)
return loss
def main():
tf.random.set_seed(233)
np.random.seed(233)
assert tf.__version__.startswith('2.')
# hyper parameters
z_dim = 100
epochs = 3000000
batch_size = 512
learning_rate = 0.0005
is_training = True
img_path = glob.glob(r'/Users/xuruihang/Documents/faces_test/*.jpg')
assert len(img_path) > 0
dataset, img_shape, _ = make_anime_dataset(img_path, batch_size)
print(dataset, img_shape)
sample = next(iter(dataset))
print(sample.shape, tf.reduce_max(sample).numpy(),
tf.reduce_min(sample).numpy())
dataset = dataset.repeat()
db_iter = iter(dataset)
generator = Generator()
generator.build(input_shape=(4, z_dim))
discriminator = Discriminator()
discriminator.build(input_shape=(4, 64, 64, 3))
z_sample = tf.random.normal([100, z_dim])
g_optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5)
d_optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5)
for epoch in range(epochs):
for _ in range(5):
batch_z = tf.random.normal([batch_size, z_dim])
batch_x = next(db_iter)
# train D
with tf.GradientTape() as tape:
d_loss, gp = d_loss_fn(generator, discriminator, batch_z, batch_x, is_training)
grads = tape.gradient(d_loss, discriminator.trainable_variables)
d_optimizer.apply_gradients(zip(grads, discriminator.trainable_variables))
batch_z = tf.random.normal([batch_size, z_dim])
with tf.GradientTape() as tape:
g_loss = g_loss_fn(generator, discriminator, batch_z, is_training)
grads = tape.gradient(g_loss, generator.trainable_variables)
g_optimizer.apply_gradients(zip(grads, generator.trainable_variables))
if epoch % 100 == 0:
print(epoch, 'd-loss:', float(d_loss), 'g-loss:', float(g_loss),
'gp:', float(gp))
z = tf.random.normal([100, z_dim])
fake_image = generator(z, training=False)
img_path = os.path.join('WGAN_iamges_test', 'wgan-%d.png' % epoch)
save_result(fake_image.numpy(), 10, img_path, color_mode='P')
if __name__ == '__main__':
main()