“自编码”是一种数据压缩算法,其中压缩和解压缩功能是1)数据特定的,2)有损的,3)从例子中自动学习而不是由人工设计。此外,在几乎所有使用术语“自动编码器”的情况下,压缩和解压缩功能都是用神经网络来实现的。
1)自动编码器是特定于数据的,这意味着它们只能压缩类似于他们所训练的数据。这与例如MPEG-2音频层III(MP3)压缩算法不同,后者通常只保留关于“声音”的假设,而不涉及特定类型的声音。在面部图片上训练的自动编码器在压缩树的图片方面做得相当差,因为它将学习的特征是面部特定的。
2)自动编码器是有损的,这意味着与原始输入相比,解压缩的输出会降低(类似于MP3或JPEG压缩)。这与无损算术压缩不同。
3)自动编码器是从数据实例中自动学习的,这是一个有用的属性:这意味着很容易培养算法的特定实例,在特定类型的输入上运行良好。它不需要任何新的工程,只需要适当的培训数据。
要构建一个自动编码器,需要三件事情:编码函数,解码函数和数据压缩表示与解压缩表示(即“丢失”函数)之间的信息损失量之间的距离函数。编码器和解码器将被选择为参数函数(通常是神经网络),并且相对于距离函数是可微分的,因此可以优化编码/解码函数的参数以最小化重构损失,使用随机梯度下降。这很简单!而且你甚至不需要理解这些词语在实践中开始使用自动编码器。
建立一个全连接的编码器和解码器。也可以单独使用编码器和解码器,在此使用Keras的函数式模型API即Model可以灵活地构建自编码器。
50个epoch后,看起来我们的自编码器优化的不错了,损失val_loss: 0.1037。
-
from keras.layers
import Input, Dense
-
from keras.models
import Model
-
from keras.datasets
import mnist
-
import numpy
as np
-
import matplotlib.pyplot
as plt
-
-
(x_train, _), (x_test, _) = mnist.load_data()
-
-
x_train = x_train.astype(
'float32') /
255.
-
x_test = x_test.astype(
'float32') /
255.
-
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[
1:])))
-
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[
1:])))
-
print(x_train.shape)
-
print(x_test.shape)
-
-
encoding_dim =
32
-
input_img = Input(shape=(
784,))
-
-
encoded = Dense(encoding_dim, activation=
'relu')(input_img)
-
decoded = Dense(
784, activation=
'sigmoid')(encoded)
-
-
autoencoder = Model(inputs=input_img, outputs=decoded)
-
encoder = Model(inputs=input_img, outputs=encoded)
-
-
encoded_input = Input(shape=(encoding_dim,))
-
decoder_layer = autoencoder.layers[
-1]
-
-
decoder = Model(inputs=encoded_input, outputs=decoder_layer(encoded_input))
-
-
autoencoder.compile(optimizer=
'adadelta', loss=
'binary_crossentropy')
-
-
autoencoder.fit(x_train, x_train, epochs=
50, batch_size=
256,
-
shuffle=
True, validation_data=(x_test, x_test))
-
-
encoded_imgs = encoder.predict(x_test)
-
decoded_imgs = decoder.predict(encoded_imgs)
-
-
n =
10
# how many digits we will display
-
plt.figure(figsize=(
20,
4))
-
for i
in range(n):
-
ax = plt.subplot(
2, n, i +
1)
-
plt.imshow(x_test[i].reshape(
28,
28))
-
plt.gray()
-
ax.get_xaxis().set_visible(
False)
-
ax.get_yaxis().set_visible(
False)
-
-
ax = plt.subplot(
2, n, i +
1 + n)
-
plt.imshow(decoded_imgs[i].reshape(
28,
28))
-
plt.gray()
-
ax.get_xaxis().set_visible(
False)
-
ax.get_yaxis().set_visible(
False)
-
plt.show()
为码字加上稀疏性约束。如果我们对隐层单元施加稀疏性约束的话,会得到更为紧凑的表达,只有一小部分神经元会被激活。在Keras中,我们可以通过添加一个activity_regularizer达到对某层激活值进行约束的目的。
encoded = Dense(encoding_dim, activation='relu',activity_regularizer=regularizers.activity_l1(10e-5))(input_img)
把多个自编码器叠起来即加深自编码器的深度,50个epoch后,损失val_loss:0.0926,比1个隐含层的自编码器要好一些。
-
import numpy
as np
-
np.random.seed(
1337)
# for reproducibility
-
-
from keras.datasets
import mnist
-
from keras.models
import Model
#泛型模型
-
from keras.layers
import Dense, Input
-
import matplotlib.pyplot
as plt
-
-
# X shape (60,000 28x28), y shape (10,000, )
-
(x_train, _), (x_test, y_test) = mnist.load_data()
-
-
# 数据预处理
-
x_train = x_train.astype(
'float32') /
255.
# minmax_normalized
-
x_test = x_test.astype(
'float32') /
255.
# minmax_normalized
-
x_train = x_train.reshape((x_train.shape[
0],
-1))
-
x_test = x_test.reshape((x_test.shape[
0],
-1))
-
print(x_train.shape)
-
print(x_test.shape)
-
-
# 压缩特征维度至2维
-
encoding_dim =
2
-
-
# this is our input placeholder
-
input_img = Input(shape=(
784,))
-
-
# 编码层
-
encoded = Dense(
128, activation=
'relu')(input_img)
-
encoded = Dense(
64, activation=
'relu')(encoded)
-
encoded = Dense(
10, activation=
'relu')(encoded)
-
encoder_output = Dense(encoding_dim)(encoded)
-
-
# 解码层
-
decoded = Dense(
10, activation=
'relu')(encoder_output)
-
decoded = Dense(
64, activation=
'relu')(decoded)
-
decoded = Dense(
128, activation=
'relu')(decoded)
-
decoded = Dense(
784, activation=
'tanh')(decoded)
-
-
# 构建自编码模型
-
autoencoder = Model(inputs=input_img, outputs=decoded)
-
-
# 构建编码模型
-
encoder = Model(inputs=input_img, outputs=encoder_output)
-
-
-
# compile autoencoder
-
autoencoder.compile(optimizer=
'adam', loss=
'mse')
-
-
autoencoder.summary()
-
encoder.summary()
-
-
# training
-
autoencoder.fit(x_train, x_train, epochs=
10, batch_size=
256, shuffle=
True)
-
-
# plotting
-
encoded_imgs = encoder.predict(x_test)
-
-
plt.scatter(encoded_imgs[:,
0], encoded_imgs[:,
1], c=y_test,s=
3)
-
plt.colorbar()
-
plt.show()
-
-
decoded_imgs = autoencoder.predict(x_test)
-
# use Matplotlib (don't ask)
-
import matplotlib.pyplot
as plt
-
-
n =
10
# how many digits we will display
-
plt.figure(figsize=(
20,
4))
-
for i
in range(n):
-
# display original
-
ax = plt.subplot(
2, n, i +
1)
-
plt.imshow(x_test[i].reshape(
28,
28))
-
plt.gray()
-
ax.get_xaxis().set_visible(
False)
-
ax.get_yaxis().set_visible(
False)
-
-
# display reconstruction
-
ax = plt.subplot(
2, n, i +
1 + n)
-
plt.imshow(decoded_imgs[i].reshape(
28,
28))
-
plt.gray()
-
ax.get_xaxis().set_visible(
False)
-
ax.get_yaxis().set_visible(
False)
-
plt.show()
当输入是图像时,使用卷积神经网络是更好的。卷积自编码器的编码器部分由卷积层和MaxPooling层构成,MaxPooling负责空域下采样。而解码器由卷积层和上采样层构成。50个epoch后,损失val_loss: 0.1018。
-
from keras.layers
import Input, Convolution2D, MaxPooling2D, UpSampling2D
-
from keras.models
import Model
-
from keras.datasets
import mnist
-
import numpy
as np
-
import matplotlib.pyplot
as plt
-
from keras.callbacks
import TensorBoard
-
-
input_img = Input(shape=(
28,
28,
1))
-
-
x = Convolution2D(
16, (
3,
3), activation=
'relu', padding=
'same')(input_img)
-
x = MaxPooling2D((
2,
2), padding=
'same')(x)
-
x = Convolution2D(
8, (
3,
3), activation=
'relu', padding=
'same')(x)
-
x = MaxPooling2D((
2,
2), padding=
'same')(x)
-
x = Convolution2D(
8, (
3,
3), activation=
'relu', padding=
'same')(x)
-
encoded = MaxPooling2D((
2,
2), padding=
'same')(x)
-
-
x = Convolution2D(
8, (
3,
3), activation=
'relu', padding=
'same')(encoded)
-
x = UpSampling2D((
2,
2))(x)
-
x = Convolution2D(
8, (
3,
3), activation=
'relu', padding=
'same')(x)
-
x = UpSampling2D((
2,
2))(x)
-
x = Convolution2D(
16, (
3,
3), activation=
'relu')(x)
-
x = UpSampling2D((
2,
2))(x)
-
decoded = Convolution2D(
1, (
3,
3), activation=
'sigmoid', padding=
'same')(x)
-
-
autoencoder = Model(inputs=input_img, outputs=decoded)
-
autoencoder.compile(optimizer=
'adadelta', loss=
'binary_crossentropy')
-
-
# 打开一个终端并启动TensorBoard,终端中输入 tensorboard --logdir=/autoencoder
-
autoencoder.fit(x_train, x_train, epochs=
50, batch_size=
256,
-
shuffle=
True, validation_data=(x_test, x_test),
-
callbacks=[TensorBoard(log_dir=
'autoencoder')])
-
-
decoded_imgs = autoencoder.predict(x_test)
UpSampling2D
上采样,扩大矩阵,可以用于复原图像等。
keras.layers.convolutional.UpSampling2D(size=(2, 2), data_format=None)
将数据的行和列分别重复size[0]和size[1]次
我们把训练样本用噪声污染,然后使解码器解码出干净的照片,以获得去噪自动编码器。首先我们把原图片加入高斯噪声,然后把像素值clip到0~1。
ps: 去噪自编码器(denoisingautoencoder, DAE)是一类接受损坏数据作为输入,并训练来预测原始未被损坏数据作为输出的自编码器。
-
#去噪 自编码器
-
from keras.layers
import Input, Convolution2D, MaxPooling2D, UpSampling2D
-
from keras.models
import Model
-
from keras.datasets
import mnist
-
import numpy
as np
-
import matplotlib.pyplot
as plt
-
from keras.callbacks
import TensorBoard
-
-
(x_train, _), (x_test, _) = mnist.load_data()
-
x_train = x_train.astype(
'float32') /
255.
-
x_test = x_test.astype(
'float32') /
255.
-
x_train = np.reshape(x_train, (len(x_train),
28,
28,
1))
-
x_test = np.reshape(x_test, (len(x_test),
28,
28,
1))
-
noise_factor =
0.5
-
x_train_noisy = x_train + noise_factor * np.random.normal(loc=
0.0, scale=
1.0, size=x_train.shape)
-
x_test_noisy = x_test + noise_factor * np.random.normal(loc=
0.0, scale=
1.0, size=x_test.shape)
-
x_train_noisy = np.clip(x_train_noisy,
0.,
1.)
#[0.,1.]
-
x_test_noisy = np.clip(x_test_noisy,
0.,
1.)
-
print(x_train.shape)
-
print(x_test.shape)
-
-
input_img = Input(shape=(
28,
28,
1))
-
x=Convolution2D(
32,(
3,
3),activation=
'relu',padding=
'same')(input_img)
-
x=MaxPooling2D(pool_size=(
2,
2),padding=
'same')(x)
-
x=Convolution2D(
32,(
3,
3),activation=
'relu',padding=
'same')(x)
-
encoded=MaxPooling2D(pool_size=(
2,
2),padding=
'same')(x)
-
-
x=Convolution2D(
32,(
3,
3),activation=
'relu',padding=
'same')(encoded)
-
x=UpSampling2D(size=(
2,
2))(x)
-
x=Convolution2D(
32,(
3,
3),activation=
'relu',padding=
'same')(x)
-
x=UpSampling2D((
2,
2))(x)
-
decoded=Convolution2D(
1,(
3,
3),activation=
'sigmoid',padding=
'same')(x)
-
-
autoencoder=Model(inputs=input_img,outputs=decoded)
-
autoencoder.compile(optimizer=
'adam',loss=
'binary_crossentropy')
-
autoencoder.summary()
-
-
# 打开一个终端并启动TensorBoard,终端中输入 tensorboard --logdir=/autoencoder
-
autoencoder.fit(x_train_noisy, x_train, epochs=
10, batch_size=
256,
-
shuffle=
True, validation_data=(x_test_noisy, x_test),
-
callbacks=[TensorBoard(log_dir=
'autoencoder', write_graph=
False)])
-
-
decoded_imgs = autoencoder.predict(x_test_noisy)
-
-
plt.figure(figsize=(
30,
6))
#设置 figure大小
-
n=
10
-
for i
in range(n):
-
ax=plt.subplot(
3,n,i+
1)
# m表示是图排成m行,n表示图排成n列,p是指你现在要把曲线画到figure中哪个图上
-
plt.imshow(x_test[i].reshape(
28,
28))
-
plt.gray()
#只有黑白两色,没有中间的渐进色
-
ax.get_xaxis().set_visible(
False)
# X 轴不可见
-
ax.get_yaxis().set_visible(
False)
# y 轴不可见
-
-
ax=plt.subplot(
3,n,i+
1+n)
-
plt.imshow(x_test_noisy[i].reshape(
28,
28))
-
plt.gray()
-
ax.get_xaxis().set_visible(
False)
-
ax.get_yaxis().set_visible(
False)
-
-
ax=plt.subplot(
3,n,i+
1+
2*n)
-
plt.imshow(decoded_imgs[i].reshape(
28,
28))
-
plt.gray()
-
ax.get_xaxis().set_visible(
False)
-
ax.get_yaxis().set_visible(
False)
-
plt.show()