图片的自动编码很容易就想到用卷积神经网络做为编码-解码器。在实际的操作中,
也经常使用卷积自动编码器去解决图像编码问题,而且非常有效。
下面通过**keras**完成简单的卷积自动编码。 编码器有堆叠的卷积层和池化层
(max pooling用于空间降采样)组成。 对应的解码器由卷积层和上采样层组成。
@requires_authorization
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Model
from keras import backend as K
import os
input_img = Input(shape=(28,28,1))
x = Conv2D(16, (3,3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2,2), padding='same')(x)
x = Conv2D(8,(3,3), activation='relu', padding='same')(x)
x = MaxPooling2D((2,2), padding='same')(x)
x = Conv2D(8, (3,3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2,2), padding='same')(x)
x = Conv2D(8, (3,3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3,3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
encoder_model = Model(inputs=autoencoder.input, outputs=autoencoder.get_layer('encoder_out').output)
def load_mnist(dataset_name):
'''
load the data
'''
data_dir = os.path.join("./data", dataset_name)
f = np.load(os.path.join(data_dir, 'mnist.npz'))
train_data = f['train'].T
trX = train_data.reshape((-1, 28, 28, 1)).astype(np.float32)
trY = f['train_labels'][-1].astype(np.float32)
test_data = f['test'].T
teX = test_data.reshape((-1, 28, 28, 1)).astype(np.float32)
teY = f['test_labels'][-1].astype(np.float32)
return trX / 255., trY, teX/255., teY
x_train, _ , x_test, _= load_mnist('mnist')
from keras.callbacks import TensorBoard
autoencoder.fit(x_train, x_train,
epochs=50,
batch_size=128,
shuffle=True,
validation_data=(x_test, x_test),
callbacks=[TensorBoard(log_dir='/tmp/autoencoder')])
import matplotlib.pyplot as plt
decoded_imgs = autoencoder.predict(x_test)
encoded_imgs = encoder_model.predict(x_test)
n = 10
plt.figure(figsize=(20, 4))
for i in range(n):
k = i + 1
ax = plt.subplot(2, n, k)
plt.imshow(x_test[k].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax = plt.subplot(2, n, k + n)
plt.imshow(decoded_imgs[i].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
n = 10
plt.figure(figsize=(20, 8))
for i in range(n):
k = i + 1
ax = plt.subplot(1, n, k)
plt.imshow(encoded[k].reshape(4, 4 * 8).T)
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()