Embedding
层将正整数(索引值)转换为 固 定 尺 寸 的 稠 密 向 量 \color{red}固定尺寸的稠密向量 固定尺寸的稠密向量,并将这个稠密向量与N维的正态分布随机数 相 乘 \color{red}相乘 相乘,从而获得一个 有 标 签 的 随 机 数 \color{red}有标签的随机数 有标签的随机数。具体操作方式是:
- 生成一个N维的正态分布随机数,再利用Embedding层将正整数(索引值)转换为N维的稠密向量;
- 将这个稠密向量与N维的正态分布随机数相乘。
输入数据代码:
noise = Input(shape=(self.latent_dim,)) # (none,100)
label = Input(shape=(1,), dtype='int32') # (none,1)
# 将正整数(索引值)转换为固定尺寸的稠密向量。
label_embedding = Flatten()(Embedding(self.num_classes, self.latent_dim)(label))
model_input = multiply([noise, label_embedding]) # 想要的数字*随机生成的噪声
img = model(model_input)
生成器代码
def build_generator(self):
model = Sequential(name='generator')
# 全连接到64*7*7的张量上,为什么输入是latent_dim?
model.add(Dense(32 * 7 * 7, activation="relu", input_dim=self.latent_dim))
# reshape成特征层的样式
model.add(Reshape((7, 7, 32)))
# 输出是7*7*64
model.add(Conv2D(filters=64, kernel_size=3, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(Activation("relu"))
# 上采样
# 7*7*64->14*14*128
model.add(UpSampling2D())
model.add(Conv2D(filters=128, kernel_size=3, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(Activation("relu"))
# 14*14*64->28*28*64
model.add(UpSampling2D())
model.add(Conv2D(filters=64, kernel_size=3, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(Activation("relu"))
# 28*28*64->28*28*1
model.add(Conv2D(self.channels, kernel_size=3, padding="same"))
model.add(Activation("tanh"))
model.summary()
noise = Input(shape=(self.latent_dim,)) # (none,100)
label = Input(shape=(1,), dtype='int32') # (none,1)
# 将正整数(索引值)转换为固定尺寸的稠密向量。
label_embedding = Flatten()(Embedding(self.num_classes, self.latent_dim)(label))
model_input = multiply([noise, label_embedding]) # 想要的数字*随机生成的噪声
img = model(model_input)
return Model([noise, label], img)
ACGAN的判别网络由卷积构成,与普通GAN的判别模型相比,它的目的 不 仅 要 判 断 出 真 伪 , 还 要 判 断 出 种 类 \color{red}不仅要判断出真伪,还要判断出种类 不仅要判断出真伪,还要判断出种类(普通GAN的判别模型的目的是根据输入的图片判断出真伪)。
def build_discriminator(self):
model = Sequential(name='discriminator')
# 28*28*1->14*14*16
model.add(Conv2D(filters=16, kernel_size=3, strides=2, input_shape=self.img_shape, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
# 14*14*16->8*8*32
model.add(Conv2D(filters=32, kernel_size=3, strides=2, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(BatchNormalization(momentum=0.8))
# 8*8*32->4*4*64
model.add(ZeroPadding2D(padding=((0, 1), (0, 1)))) # ?
model.add(Conv2D(filters=64, kernel_size=3, strides=2, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(BatchNormalization(momentum=0.8))
# 4*4*64->4*4*128
model.add(Conv2D(filters=128, kernel_size=3, strides=1, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(GlobalAveragePooling2D())
# 导入图片
img = Input(shape=self.img_shape) # (none,28,28,1))
features = model(img) # (none,128)
validity = Dense(1, activation="sigmoid")(features) # (none,1)
label = Dense(self.num_classes, activation="softmax")(features) # (none,10)
return Model(img, [validity, label])
与传统的GAN训练思想大致相同,只不过在此基础上增加了分类的输出。
ACGAN的训练思路分为如下几个步骤:
def train(self, epochs, batch_size=128, sample_interval=50):
# 加载数据
(X_train, y_train), (_, _) = mnist.load_data()
# 对数据预处理,归一化
X_train = (X_train.astype(np.float32) - 127.5) / 127.5
X_train = np.expand_dims(X_train, axis=3)
Y_train = y_train.reshape(-1, 1)
valid = np.ones((batch_size, 1)) # (256,1)
fake = np.zeros((batch_size, 1)) # (256,1)
for epoch in range(epochs):
# 随机选择真实图片
idx = np.random.randint(0, X_train.shape[0], batch_size)
img, labels = X_train[idx], Y_train[idx]
# 生成模型产生仿制图片
noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) # [256,100]
sampled_label = np.random.randint(0, 10, (batch_size, 1)) # [256,1]标签
gen_imgs = self.generator.predict([noise, sampled_label]) # [256,28,28,1]生成后的图片
d_loss_real = self.discriminator.train_on_batch(img, [valid, labels]) # list 5,5个float数字,1个loss,4个metrics
d_loss_fake = self.discriminator.train_on_batch(gen_imgs, [fake, sampled_label]) # list 5,5个float数字
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# 训练生成模型
g_loss = self.combined.train_on_batch([noise, sampled_label], [valid, sampled_label])
# list 3,3个float数字,1个loss, 2个metrics
print("%d [D loss: %f, acc.: %.2f%%, op_acc: %.2f%%] [G loss: %f]" % (
epoch, d_loss[0], 100 * d_loss[3], 100 * d_loss[4], g_loss[0]))
加载生成模型的权重,给定随机数和特定的标签即可调用生成模型生成(32,32,3)的图片,达到数据扩增的目的。
if __name__ == '__main__':
acgan = ACGAN()
weight_path = "./weights/gen_epoch20000.h5"
acgan.test(Label =np.array([[3]]),weight_path = weight_path)
from __future__ import print_function, division
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Input, Dense, Reshape, Flatten, multiply, Dropout
from tensorflow.keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D, GlobalAveragePooling2D,MaxPooling2D
from tensorflow.keras.layers import LeakyReLU, UpSampling2D, Conv2D
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import os
import numpy as np
class ACGAN():
def __init__(self):
# 输入图片的大小
self.img_rows = 28
self.img_cols = 28
self.channels = 1
self.img_shape = (self.img_rows, self.img_cols, self.channels)
# 由于十个数字,所以分十类
self.num_classes = 10
# 这个是什么意思
self.latent_dim = 100
# adam优化器
# optimizer = Adam(learning_rate=0.0002, beta_1=0.5)
optimizer = Adam(0.0002, 0.5)
# 判别模型
losses = ['binary_crossentropy', 'sparse_categorical_crossentropy']
self.discriminator = self.build_discriminator()
self.discriminator.compile(loss=losses,
optimizer=optimizer,
metrics=['accuracy'])
# 生成模型
self.generator = self.build_generator()
# conbine是生成模型和判别模型的结合
# 训练生成模型的时候,判别模型的trainable为False
# 用于训练生成模型
# 生成一张噪声图片
noise = Input(shape=(self.latent_dim,)) # [none,100]
label = Input(shape=(1,)) # [none, 1]
img = self.generator([noise, label]) # [28, 28, 1]
self.discriminator.trainable = False
valid, target_labels = self.discriminator(img) # valid[none,1];target_labels[none,10]
self.combined = Model([noise, label], [valid, target_labels]) # ?
self.combined.compile(loss=losses,
optimizer=optimizer) # ?
def build_generator(self):
model = Sequential(name='generator')
# 全连接到64*7*7的张量上,为什么输入是latent_dim?
model.add(Dense(32 * 7 * 7, activation="relu", input_dim=self.latent_dim))
# reshape成特征层的样式
model.add(Reshape((7, 7, 32)))
# 输出是7*7*64
model.add(Conv2D(filters=64, kernel_size=3, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(Activation("relu"))
# 上采样
# 7*7*64->14*14*128
model.add(UpSampling2D())
model.add(Conv2D(filters=128, kernel_size=3, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(Activation("relu"))
# 14*14*64->28*28*64
model.add(UpSampling2D())
model.add(Conv2D(filters=64, kernel_size=3, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(Activation("relu"))
# 28*28*64->28*28*1
model.add(Conv2D(self.channels, kernel_size=3, padding="same"))
model.add(Activation("tanh"))
model.summary()
noise = Input(shape=(self.latent_dim,)) # (none,100)
label = Input(shape=(1,), dtype='int32') # (none,1)
# 将正整数(索引值)转换为固定尺寸的稠密向量。
label_embedding = Flatten()(Embedding(self.num_classes, self.latent_dim)(label))
model_input = multiply([noise, label_embedding]) # 想要的数字*随机生成的噪声
img = model(model_input)
return Model([noise, label], img)
def build_discriminator(self):
model = Sequential(name='discriminator')
# 28*28*1->14*14*16
model.add(Conv2D(filters=16, kernel_size=3, strides=2, input_shape=self.img_shape, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
# 14*14*16->8*8*32
model.add(Conv2D(filters=32, kernel_size=3, strides=2, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(BatchNormalization(momentum=0.8))
# 8*8*32->4*4*64
model.add(ZeroPadding2D(padding=((0, 1), (0, 1)))) # ?
model.add(Conv2D(filters=64, kernel_size=3, strides=2, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(BatchNormalization(momentum=0.8))
# 4*4*64->4*4*128
model.add(Conv2D(filters=128, kernel_size=3, strides=1, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(GlobalAveragePooling2D())
# 导入图片
img = Input(shape=self.img_shape) # (none,28,28,1))
features = model(img) # (none,128)
validity = Dense(1, activation="sigmoid")(features) # (none,1)
label = Dense(self.num_classes, activation="softmax")(features) # (none,10)
return Model(img, [validity, label])
def train(self, epochs, batch_size=128, sample_interval=50):
# 加载数据
(X_train, y_train), (_, _) = mnist.load_data()
# 对数据预处理,归一化
X_train = (X_train.astype(np.float32) - 127.5) / 127.5
X_train = np.expand_dims(X_train, axis=3)
Y_train = y_train.reshape(-1, 1)
valid = np.ones((batch_size, 1)) # (256,1)
fake = np.zeros((batch_size, 1)) # (256,1)
for epoch in range(epochs):
# 随机选择真实图片
idx = np.random.randint(0, X_train.shape[0], batch_size)
img, labels = X_train[idx], Y_train[idx]
# 生成模型产生仿制图片
noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) # [256,100]
sampled_label = np.random.randint(0, 10, (batch_size, 1)) # [256,1]标签
gen_imgs = self.generator.predict([noise, sampled_label]) # [256,28,28,1]生成后的图片
d_loss_real = self.discriminator.train_on_batch(img, [valid, labels]) # list 5,5个float数字,1个loss,4个metrics
d_loss_fake = self.discriminator.train_on_batch(gen_imgs, [fake, sampled_label]) # list 5,5个float数字
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# 训练生成模型
g_loss = self.combined.train_on_batch([noise, sampled_label], [valid, sampled_label])
# list 3,3个float数字,1个loss, 2个metrics
print("%d [D loss: %f, acc.: %.2f%%, op_acc: %.2f%%] [G loss: %f]" % (
epoch, d_loss[0], 100 * d_loss[3], 100 * d_loss[4], g_loss[0]))
if epoch % sample_interval == 0:
self.sampel_image(epoch)
def sampel_image(self, epoch):
r, c = 2, 5
noise = np.random.normal(0, 1, (r * c, 100)) # ?
sampled_labels = np.arange(0, 10).reshape(-1, 1)
gen_imgs = self.generator.predict([noise, sampled_labels])
gen_imgs = 0.5 * gen_imgs + 0.5
fig, axs = plt.subplots(r, c)
cnt = 0
for i in range(r):
for j in range(c):
axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray')
axs[i, j].set_title("Digit: %d" % sampled_labels[cnt])
axs[i, j].axis('off')
cnt += 1
fig.savefig("images/%d.png" % epoch)
plt.close()
def save_model(self):
def save(model, model_name):
model_path = "saved_model/%s.json" % model_name
weights_path = "saved_model/%s_weights.hdf5" % model_name
options = {"file_arch": model_path,
"file_weight": weights_path}
json_string = model.to_json()
open(options['file_arch'], 'w').write(json_string)
model.save_weights(options['file_weight'])
save(self.generator, "generator")
save(self.discriminator, "discriminator")
if __name__ == '__main__':
if not os.path.exists("./images"):
os.makedirs("./images")
acgan = ACGAN()
acgan.train(epochs=30000, batch_size=256, sample_interval=50)