【深度学习】用变分自编码器生成图像和生成式对抗网络

目录

问题描述:

代码展示:

VAE代码段

GAN部分(仅供参考)

 

运行截图:

参考:


问题描述:

        从图像的潜在空间中采样,并创建全新图像或编辑现有图像,这是目前最流行也是最成 功的创造性人工智能应用。在图像生成该领域中两种主要技术分别是变分自编码器(VAE, variational autoencoder)和生成式对抗网络(GAN,generative adversarial network)。

        GAN 和 VAE 是两种不同的策略,每种策略都有各自的特点。VAE 非常适合用于学习具有良好结构的潜在空间,其中特定方向表示数据中有意义的变化轴。GAN 生成的图像可能非常逼真,但它的潜在空间可能没有良好结构,也没有足够的连续性

代码展示:

VAE代码段

#vae编码器网络
from tensorflow import keras
from tensorflow.keras import layers

latent_dim = 2

encoder_inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
encoder = keras.Model(encoder_inputs, [z_mean, z_log_var], name="encoder")
encoder.summary()


#潜在空间采样函数
import tensorflow as tf

class Sampler(layers.Layer):
    def call(self,z_mean,z_log_var):
        batch_size = tf.shape(z_mean)[0]
        z_size = tf.shape(z_mean)[1]
        epsilon = tf.random.normal(shape=(batch_size,z_size))
        return z_mean + tf.exp(0.5* z_log_var) *epsilon
        
# VAE 解码器网络 将潜在空间点映射为图像
latent_inputs = keras.Input(shape=(latent_dim,))


x = layers.Dense(7*7*64,activation='relu')(latent_inputs)

x = layers.Reshape((7,7,64))(x)

x =layers.Conv2DTranspose(64,3,activation='relu',strides=2,padding='same')(x)
x =layers.Conv2DTranspose(32,3,activation='relu',strides=2,padding='same')(x)
decoder_outputs = layers.Conv2D(1,3,activation='sigmoid',padding='same')(x)
decoder = keras.Model(latent_inputs,decoder_outputs,name='decoder')

decoder.summary()



# 用于计算 VAE 损失的自定义层
class VAE(keras.Model):
    def __init__(self,encoder,decoder,**kwargs):
        super().__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.sampler = Sampler()
        self.total_loss_tracker = keras.metrics.Mean(name='total_loss')
        self.reconstruction_loss_tracker =keras.metrics.Mean(
            name='reconstruction_loss'
        )
        self.kl_loss_tracker = keras.metrics.Mean(name='kl_loss')


    @property
    def metrics(self):
        return [self.total_loss_tracker,
                self.reconstruction_loss_tracker,
                self.kl_loss_tracker]

    def train_step(self,data):
        with tf.GradientTape() as tape:
            z_mean,z_log_var = self.encoder(data)
            z = self.sampler(z_mean,z_log_var)
            reconstruction = decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    keras.losses.binary_crossentropy(data,reconstruction),
                    axis = (1,2)
                )
            )
            kl_loss =  -0.5 * (1+z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            total_loss = reconstruction_loss + tf.reduce_mean(kl_loss)

        grads = tape.gradient(total_loss,self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads,self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)

        return{
            "total_loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }


#训练VAE
import numpy as np
(x_train,_),(x_test,_) = keras.datasets.mnist.load_data()
mnist_digits = np.concatenate([x_train,x_test],axis=0)
mnist_digits = np.expand_dims(mnist_digits,-1).astype("float32") /255

vae = VAE(encoder,decoder)
vae.compile(optimizer=keras.optimizers.Adam(),run_eagerly=True)
vae.fit(mnist_digits,epochs=30,batch_size =128)


#从二维潜在空间中采样一组点的网格,并将其解码为图像
import matplotlib.pyplot as plt

n =30
digit_size = 28
figure = np.zeros((digit_size * n,digit_size *n ))
grid_x = np.linspace(-1,1,n)
grid_y = np.linspace(-1,1,n)[::-1]

for i,yi in enumerate(grid_y):
    for j,xi in enumerate(grid_x):
        z_sample = np.array([[xi, yi]])
        x_decoded = vae.decoder.predict(z_sample)
        digit = x_decoded[0].reshape(digit_size, digit_size)
        figure[
            i * digit_size : (i + 1) * digit_size,
            j * digit_size : (j + 1) * digit_size,
        ] = digit

plt.figure(figsize=(15,15))

start_range = digit_size //2
end_range = n* digit_size + start_range
pixel_range =np.arange(start_range,end_range,digit_size)
sample_range_x =np.round(grid_x,1)
sample_range_y =np.round(grid_y,1)

plt.xticks(pixel_range,sample_range_x)
plt.yticks(pixel_range,sample_range_y)
plt.xlabel('z[0]')
plt.ylabel('z[1]')
plt.axis('off')
plt.imshow(figure)

GAN部分(仅供参考)

 

from tensorflow import keras
dataset = keras.utils.image_dataset_from_directory(
    "celeba_gan",
    label_mode=None,
    image_size=(64, 64),
    batch_size=32,
    smart_resize=True)
dataset = dataset.map(lambda x: x / 255.)
import matplotlib.pyplot as plt
for x in dataset:
    plt.axis("off")
    plt.imshow((x.numpy() * 255).astype("int32")[0])
    break

#GAN生成器网络
from tensorflow.keras import layers
from tensorflow import keras

latent_dim = 128

generator = keras.Sequential(
    [
    keras.Input(shape=(latent_dim,)),
        layers.Dense(8 * 8 * 128),
        layers.Reshape((8, 8, 128)),
        layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2DTranspose(256, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2DTranspose(512, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2D(3, kernel_size=5, padding="same", activation="sigmoid"),
    ],name='generator')


generator.summary()



from tensorflow.keras import layers
#判别器网络
discriminator = keras.Sequential(
    [
        keras.Input(shape=(64, 64, 3)),
        layers.Conv2D(64, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2D(128, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Conv2D(128, kernel_size=4, strides=2, padding="same"),
        layers.LeakyReLU(alpha=0.2),
        layers.Flatten(),
        layers.Dropout(0.2),
        layers.Dense(1, activation="sigmoid"),
    ],
    name="discriminator",
)

#对抗网络-GAN模型
import tensorflow as  tf


class GAN(keras.Model):

    def __init__(self,discriminator,generator,latent_dim):
        super().__init__()
        self.discriminator = discriminator
        self.generator = generator
        self.latent_dim = latent_dim

        self.d_loss_metric = keras.metrics.Mean(name='d_loss')
        self.g_loss_metric = keras.metrics.Mean(name='g_loss')


    def compile(self,d_optimizer,g_optimizer,loss_fn):
        super(GAN,self).compile()
        self.d_optimizer = d_optimizer
        self.g_optimizer = g_optimizer
        self.loss_fn = loss_fn

    @property
    def metrics(self):
        return [self.d_loss_metric,self.g_loss_metric]


    def train_step(self,real_images):
        batch_size = tf.shape(real_images)[0]

        random_latent_vectors = tf.random.normal(
            shape=(batch_size,self.latent_dim))

        generated_images = self.generator(random_latent_vectors)
        combined_images = tf.concat([generated_images,real_images],axis=0)

        labels = tf.concat(
            [tf.ones((batch_size,1)),tf.zeros((batch_size,1))],
            axis = 0
        )

        labels += 0.05 * tf.random.uniform(tf.shape(labels))


        with tf.GradientTape() as tape:
            predictions = self.discriminator(combined_images)
            d_loss = self.loss_fn(labels,predictions)
        
        grads = tape.gradient(d_loss,self.discriminator.trainable_weights)
        self.d_optimizer.apply_gradients(
            zip(
                grads,self.discriminator.trainable_weights
            )
        )

        random_latent_vectors = tf.random.normal(
            shape=(batch_size,self.latent_dim)
        )

        misleading_labels = tf.zeros((batch_size,1))

        with tf.GradientTape() as tape:
            predictions = self.discriminator(self.generator(random_latent_vectors))
            g_loss = self.loss_fn(misleading_labels, predictions)
        grads = tape.gradient(g_loss, self.generator.trainable_weights)
        self.g_optimizer.apply_gradients(
            zip(grads, self.generator.trainable_weights))

        self.d_loss_metric.update_state(d_loss)
        self.g_loss_metric.update_state(g_loss)
        return {"d_loss": self.d_loss_metric.result(),    
                "g_loss": self.g_loss_metric.result()}



#在训练期间对生成的图像进行采样的回调
class GANMonitor(keras.callbacks.Callback):
    def __init__(self,num_img=3,latent_dim =128):
        self.num_img = num_img
        self.latent_dim = latent_dim

    def on_epoch_end(self,epoch,logs=None):
        random_latent_vectors = tf.random.normal(shape=(self.num_img,self.latent_dim))
        generated_images = self.model.generator(random_latent_vectors)
        generated_images *= 255
        generated_images.numpy()
        for i in range(self.num_img):
            img = keras.utils.array_to_img (generated_images[i])
            img.save(f"GAN -generated_img_{epoch:03d}_{i}.png")



#编译和训练模型
epochs = 10
gan = GAN(discriminator=discriminator, generator=generator, latent_dim=latent_dim)
gan.compile(
    d_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    g_optimizer=keras.optimizers.Adam(learning_rate=0.0001),
    loss_fn=keras.losses.BinaryCrossentropy(),
)

gan.fit(
    dataset, epochs=epochs, callbacks=[GANMonitor(num_img=10, latent_dim=latent_dim)]
) 

    

运行截图:

下图为VAE的运行截图,关于GAN因训练时间太长所以暂无成功截图

【深度学习】用变分自编码器生成图像和生成式对抗网络_第1张图片

参考:

《Python深度学习》

你可能感兴趣的:(深度学习,人工智能,计算机视觉)