目录
问题描述:
代码展示:
VAE代码段
GAN部分(仅供参考)
运行截图:
参考:
从图像的潜在空间中采样,并创建全新图像或编辑现有图像,这是目前最流行也是最成 功的创造性人工智能应用。在图像生成该领域中两种主要技术分别是变分自编码器(VAE, variational autoencoder)和生成式对抗网络(GAN,generative adversarial network)。
GAN 和 VAE 是两种不同的策略,每种策略都有各自的特点。VAE 非常适合用于学习具有良好结构的潜在空间,其中特定方向表示数据中有意义的变化轴。GAN 生成的图像可能非常逼真,但它的潜在空间可能没有良好结构,也没有足够的连续性
#vae编码器网络
from tensorflow import keras
from tensorflow.keras import layers
latent_dim = 2
encoder_inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
encoder = keras.Model(encoder_inputs, [z_mean, z_log_var], name="encoder")
encoder.summary()
#潜在空间采样函数
import tensorflow as tf
class Sampler(layers.Layer):
def call(self,z_mean,z_log_var):
batch_size = tf.shape(z_mean)[0]
z_size = tf.shape(z_mean)[1]
epsilon = tf.random.normal(shape=(batch_size,z_size))
return z_mean + tf.exp(0.5* z_log_var) *epsilon
# VAE 解码器网络 将潜在空间点映射为图像
latent_inputs = keras.Input(shape=(latent_dim,))
x = layers.Dense(7*7*64,activation='relu')(latent_inputs)
x = layers.Reshape((7,7,64))(x)
x =layers.Conv2DTranspose(64,3,activation='relu',strides=2,padding='same')(x)
x =layers.Conv2DTranspose(32,3,activation='relu',strides=2,padding='same')(x)
decoder_outputs = layers.Conv2D(1,3,activation='sigmoid',padding='same')(x)
decoder = keras.Model(latent_inputs,decoder_outputs,name='decoder')
decoder.summary()
# 用于计算 VAE 损失的自定义层
class VAE(keras.Model):
def __init__(self,encoder,decoder,**kwargs):
super().__init__(**kwargs)
self.encoder = encoder
self.decoder = decoder
self.sampler = Sampler()
self.total_loss_tracker = keras.metrics.Mean(name='total_loss')
self.reconstruction_loss_tracker =keras.metrics.Mean(
name='reconstruction_loss'
)
self.kl_loss_tracker = keras.metrics.Mean(name='kl_loss')
@property
def metrics(self):
return [self.total_loss_tracker,
self.reconstruction_loss_tracker,
self.kl_loss_tracker]
def train_step(self,data):
with tf.GradientTape() as tape:
z_mean,z_log_var = self.encoder(data)
z = self.sampler(z_mean,z_log_var)
reconstruction = decoder(z)
reconstruction_loss = tf.reduce_mean(
tf.reduce_sum(
keras.losses.binary_crossentropy(data,reconstruction),
axis = (1,2)
)
)
kl_loss = -0.5 * (1+z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
total_loss = reconstruction_loss + tf.reduce_mean(kl_loss)
grads = tape.gradient(total_loss,self.trainable_weights)
self.optimizer.apply_gradients(zip(grads,self.trainable_weights))
self.total_loss_tracker.update_state(total_loss)
self.reconstruction_loss_tracker.update_state(reconstruction_loss)
self.kl_loss_tracker.update_state(kl_loss)
return{
"total_loss": self.total_loss_tracker.result(),
"reconstruction_loss": self.reconstruction_loss_tracker.result(),
"kl_loss": self.kl_loss_tracker.result(),
}
#训练VAE
import numpy as np
(x_train,_),(x_test,_) = keras.datasets.mnist.load_data()
mnist_digits = np.concatenate([x_train,x_test],axis=0)
mnist_digits = np.expand_dims(mnist_digits,-1).astype("float32") /255
vae = VAE(encoder,decoder)
vae.compile(optimizer=keras.optimizers.Adam(),run_eagerly=True)
vae.fit(mnist_digits,epochs=30,batch_size =128)
#从二维潜在空间中采样一组点的网格,并将其解码为图像
import matplotlib.pyplot as plt
n =30
digit_size = 28
figure = np.zeros((digit_size * n,digit_size *n ))
grid_x = np.linspace(-1,1,n)
grid_y = np.linspace(-1,1,n)[::-1]
for i,yi in enumerate(grid_y):
for j,xi in enumerate(grid_x):
z_sample = np.array([[xi, yi]])
x_decoded = vae.decoder.predict(z_sample)
digit = x_decoded[0].reshape(digit_size, digit_size)
figure[
i * digit_size : (i + 1) * digit_size,
j * digit_size : (j + 1) * digit_size,
] = digit
plt.figure(figsize=(15,15))
start_range = digit_size //2
end_range = n* digit_size + start_range
pixel_range =np.arange(start_range,end_range,digit_size)
sample_range_x =np.round(grid_x,1)
sample_range_y =np.round(grid_y,1)
plt.xticks(pixel_range,sample_range_x)
plt.yticks(pixel_range,sample_range_y)
plt.xlabel('z[0]')
plt.ylabel('z[1]')
plt.axis('off')
plt.imshow(figure)
from tensorflow import keras
dataset = keras.utils.image_dataset_from_directory(
"celeba_gan",
label_mode=None,
image_size=(64, 64),
batch_size=32,
smart_resize=True)
dataset = dataset.map(lambda x: x / 255.)
import matplotlib.pyplot as plt
for x in dataset:
plt.axis("off")
plt.imshow((x.numpy() * 255).astype("int32")[0])
break
#GAN生成器网络
from tensorflow.keras import layers
from tensorflow import keras
latent_dim = 128
generator = keras.Sequential(
[
keras.Input(shape=(latent_dim,)),
layers.Dense(8 * 8 * 128),
layers.Reshape((8, 8, 128)),
layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding="same"),
layers.LeakyReLU(alpha=0.2),
layers.Conv2DTranspose(256, kernel_size=4, strides=2, padding="same"),
layers.LeakyReLU(alpha=0.2),
layers.Conv2DTranspose(512, kernel_size=4, strides=2, padding="same"),
layers.LeakyReLU(alpha=0.2),
layers.Conv2D(3, kernel_size=5, padding="same", activation="sigmoid"),
],name='generator')
generator.summary()
from tensorflow.keras import layers
#判别器网络
discriminator = keras.Sequential(
[
keras.Input(shape=(64, 64, 3)),
layers.Conv2D(64, kernel_size=4, strides=2, padding="same"),
layers.LeakyReLU(alpha=0.2),
layers.Conv2D(128, kernel_size=4, strides=2, padding="same"),
layers.LeakyReLU(alpha=0.2),
layers.Conv2D(128, kernel_size=4, strides=2, padding="same"),
layers.LeakyReLU(alpha=0.2),
layers.Flatten(),
layers.Dropout(0.2),
layers.Dense(1, activation="sigmoid"),
],
name="discriminator",
)
#对抗网络-GAN模型
import tensorflow as tf
class GAN(keras.Model):
def __init__(self,discriminator,generator,latent_dim):
super().__init__()
self.discriminator = discriminator
self.generator = generator
self.latent_dim = latent_dim
self.d_loss_metric = keras.metrics.Mean(name='d_loss')
self.g_loss_metric = keras.metrics.Mean(name='g_loss')
def compile(self,d_optimizer,g_optimizer,loss_fn):
super(GAN,self).compile()
self.d_optimizer = d_optimizer
self.g_optimizer = g_optimizer
self.loss_fn = loss_fn
@property
def metrics(self):
return [self.d_loss_metric,self.g_loss_metric]
def train_step(self,real_images):
batch_size = tf.shape(real_images)[0]
random_latent_vectors = tf.random.normal(
shape=(batch_size,self.latent_dim))
generated_images = self.generator(random_latent_vectors)
combined_images = tf.concat([generated_images,real_images],axis=0)
labels = tf.concat(
[tf.ones((batch_size,1)),tf.zeros((batch_size,1))],
axis = 0
)
labels += 0.05 * tf.random.uniform(tf.shape(labels))
with tf.GradientTape() as tape:
predictions = self.discriminator(combined_images)
d_loss = self.loss_fn(labels,predictions)
grads = tape.gradient(d_loss,self.discriminator.trainable_weights)
self.d_optimizer.apply_gradients(
zip(
grads,self.discriminator.trainable_weights
)
)
random_latent_vectors = tf.random.normal(
shape=(batch_size,self.latent_dim)
)
misleading_labels = tf.zeros((batch_size,1))
with tf.GradientTape() as tape:
predictions = self.discriminator(self.generator(random_latent_vectors))
g_loss = self.loss_fn(misleading_labels, predictions)
grads = tape.gradient(g_loss, self.generator.trainable_weights)
self.g_optimizer.apply_gradients(
zip(grads, self.generator.trainable_weights))
self.d_loss_metric.update_state(d_loss)
self.g_loss_metric.update_state(g_loss)
return {"d_loss": self.d_loss_metric.result(),
"g_loss": self.g_loss_metric.result()}
#在训练期间对生成的图像进行采样的回调
class GANMonitor(keras.callbacks.Callback):
def __init__(self,num_img=3,latent_dim =128):
self.num_img = num_img
self.latent_dim = latent_dim
def on_epoch_end(self,epoch,logs=None):
random_latent_vectors = tf.random.normal(shape=(self.num_img,self.latent_dim))
generated_images = self.model.generator(random_latent_vectors)
generated_images *= 255
generated_images.numpy()
for i in range(self.num_img):
img = keras.utils.array_to_img (generated_images[i])
img.save(f"GAN -generated_img_{epoch:03d}_{i}.png")
#编译和训练模型
epochs = 10
gan = GAN(discriminator=discriminator, generator=generator, latent_dim=latent_dim)
gan.compile(
d_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
g_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
loss_fn=keras.losses.BinaryCrossentropy(),
)
gan.fit(
dataset, epochs=epochs, callbacks=[GANMonitor(num_img=10, latent_dim=latent_dim)]
)
下图为VAE的运行截图,关于GAN因训练时间太长所以暂无成功截图
《Python深度学习》