在机器学习中,我们往往希望将复杂的、抽象的目标具体化,这是我们需要处理复杂、具体、维度高的数据。例如,图像、文字、音频等等。尽管这些形式看起来比较复杂,但往往复杂的数据背后都隐含着许多信息。我们可以根据这一特点,将原始数据化简,用比较低的维度去表达,从而达到我们想要的目的。在机器学习中有很多去压缩数据抽取核心特征的技巧,可以将复杂高维度的数据简化。Variational auto-encoder是人们常用的一种方法,Variational auto-encoder在无监督学习领域有着举足轻重的作用。下面就简单的介绍Variational auto-encoder。
!这里插入图片描述](https://img-blog.csdnimg.cn/20200221161255687.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3dlaXhpbl80NTU2MTc5NQ==,size_16,color_FFFFFF,t_70)
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import Sequential, layers
from PIL import Image
from matplotlib import pyplot as plt
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CCP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')
#save_images-保存图片
def save_images(imgs,name):
new_im = Image.new('L', (280, 280))
index = 0
for i in range(0, 280, 28):
for j in range(0, 280 ,28):
im = imgs[index]
im = Image.fromarray(im, mode='L')
new_im.paste(im, (i, j))
index += 1
new_im.save(name)
#fashion_mnist-加载数据集
h_dim = 20
batchsz = 512
lr = 1e-4
(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
x_train, x_test = x_train.astype(np.float32)/255., x_test.astype(np.float32)/255.
train_db = tf.data.Dataset.from_tensor_slices(x_train)
train_db = train_db.shuffle(batchsz*5).batch(batchsz)
test_db = tf.data.Dataset.from_tensor_slices(x_test)
test_db = test_db.batch(batchsz)
print(x_train.shape, y_train.shape)
print(x_test.shape,y_test.shape)
#设置超参数
z_dim = 10
#VAE-定义网络层
class VAE(keras.Model):
def __init__(self):
super(VAE, self).__init__()
#编码层网络
#分支结构—fc1->fc2,fc1->fc3
self.fc1 = layers.Dense(128)
#得到均值预测
self.fc2 = layers.Dense(z_dim)
#得到方差预测
self.fc3 = layers.Dense(z_dim)
#解码层网络
self.fc4 = layers.Dense(128)
self.fc5 = layers.Dense(784)
#encoer-编码传播过程
def encoder(self, x):
#从fc1层出来进入一个激活函数
h = tf.nn.relu(self.fc1(x))
#得到均值
mu = self.fc2(h)
#得到方差
log_var = self.fc3(h)
#返回均值、方差
return mu, log_var
#decoder-解码传播过程
def decoder(self, z):
out = tf.nn.relu(self.fc4(z))
out = self.fc5(out)
return out
#Reparameterization-重新参数化
def reparameterize(self,mu, log_var):
#正态分布
eps = tf.random.normal(log_var.shape)
#计算方差
std = tf.exp(log_var)**0.5
z = mu + std * eps
return z
def call(self, inputs, training=None):
mu, log_var = self.encoder(inputs)
z = self.reparameterize(mu, log_var)
x_hat = self.decoder(z)
return x_hat, mu,log_var
model = VAE()
model.build(input_shape=(4, 784))
optimizer = tf.optimizers.Adam(lr)
for epoch in range(1000):
for step, x in enumerate(train_db):
x = tf.reshape(x,[-1,784])
with tf.GradientTape() as tape:
x_rec_logits, mu,log_var = model(x)
rec_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=x, logits=x_rec_logits)
rec_loss = tf.reduce_sum(rec_loss) /x.shape[0]
#计算kl,均值和方差的一个正态分布
kl_div = -0.5 * (log_var + mu**2 - tf.exp(log_var))
#计算重建误差
kl_div = tf.reduce_sum(kl_div) / x.shape[0]
#计算总的误差
loss = rec_loss + 1.*kl_div
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
if step % 100==0:
print(epoch,step,'kl_div:',float(kl_div), 'rec loss:', float(rec_loss))
#做测试
z = tf.random.normal((batchsz,z_dim))
logits = model.decoder(z)
x_hat = tf.sigmoid(logits)
#还原成图片
x_hat = tf.reshape(x_hat,[-1, 28, 28]).numpy()*255.
x_hat = x_hat.astype(np.uint8)
save_images(x_hat,'vae_image/sampled_epoch%d.png'%epoch)
#重建图片
x = next(iter(test_db))
x = tf.reshape(x,[-1,784])
x_hat_logits, _, _=model(x)
x_hat = tf.sigmoid(x_hat_logits)
x_hat = tf.reshape(x_hat, [-1,28,28]).numpy()*255.
x_hat = x_hat.astype(np.uint8)
save_images(x_hat, 'vae_images/rec_epoch%d.png'%epoch)
#最后可以看到rec的总是要比sampled的好,原因就在于sample只利用了decoder部分随机还原图片,reconstruction重建是一对一的一个映射。