Tensorflow搭建的多层感知机网络准确率有一定的极限,需要搭建CNN来进一步提高准确率。相对于MLP,主要是增加了卷积和池化层。
实现一个CNN,首先得定义一些可以复用的基本单元,主要是weight、bias、conv2d(卷积)和池化(最大值)。第二步,搭建CNN网络,每层网络都用tf.name_scope定义名字,包括输入层,卷积层1,池化层1,卷积层2,池化层2,平坦层,隐藏层,输出层;还需要定义优化器,包括损失函数和精度。第三步训练、评估和预测。
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
import matplotlib.pyplot as plt
import numpy as np
from time import time
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
def show_image(image):
plt.imshow(image.reshape(28, 28), cmap = 'binary')
plt.show()
def plot_image_label_prediction(images, labels, prediction = [], idx = 0, num = 10):
fig = plt.gcf()
fig.set_size_inches(12, 14)
if num > 25:
num = 25
for i in range(0, num):
ax = plt.subplot(5, 5, 1 + i)
ax.imshow(np.reshape(images[idx], (28, 28)), cmap="binary")
title = "label = " + str(np.argmax(labels[idx]))
if len(prediction) > 0:
title += ", prediction = " + str(prediction[idx])
ax.set_title(title, fontsize = 10)
ax.set_xticks([])
ax.set_yticks([])
idx += 1
plt.show()
def layer(output_dim, input_dim, inputs, activation = None):
W = tf.Variable(tf.random_normal([input_dim, output_dim]))
b = tf.Variable(tf.random_normal([1, output_dim]))
XWb = tf.matmul(inputs, W) + b
if activation is None:
outputs = XWb
else:
outputs = activation(XWb)
return outputs
def weight(shape):
return tf.Variable(tf.truncated_normal(shape, stddev = 0.1), name = "W")
def bias(shape):
return tf.Variable(tf.constant(0.1, shape = shape), name = "b")
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides = [1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize = [1, 2, 2, 1],
strides = [1, 2, 2, 1],
padding = 'SAME')
mnist = input_data.read_data_sets("data/MNIST_data/", one_hot = True)
#show_image(mnist.train.images[0])
print("labels]0]: ", mnist.train.labels[0])
print("labels[0]: ", np.argmax(mnist.train.labels[0]))
#plot_image_label_prediction(mnist.train.images, mnist.train.labels)
#batch_images, batch_labels = mnist.train.next_batch(batch_size = 100)
#plot_image_label_prediction(batch_images, batch_labels)
with tf.name_scope("Input_layer"):
x = tf.placeholder("float", [None, 784], name = "x")
x_image = tf.reshape(x, [-1, 28, 28, 1])
with tf.name_scope("C1_Conv"):
W1 = weight([5, 5, 1, 16])
b1 = bias([16])
Conv1 = conv2d(x_image, W1) + b1
C1_Conv = tf.nn.relu(Conv1)
with tf.name_scope("C1_Pool"):
C1_Pool = max_pool_2x2(C1_Conv)
with tf.name_scope("C2_Conv"):
W2 = weight([5, 5, 16, 36])
b2 = bias([36])
Conv2 = conv2d(C1_Pool, W2) + b2
C2_Conv = tf.nn.relu(Conv2)
with tf.name_scope("C2_Pool"):
C2_Pool = max_pool_2x2(C2_Conv)
with tf.name_scope("D_Flat"):
D_Flat = tf.reshape(C2_Pool, [-1, 1764])
with tf.name_scope("D_Hidden_Layer"):
W3 = weight([1764, 128])
b3 = bias([128])
D_Hidden = tf.nn.relu(tf.matmul(D_Flat, W3) + b3)
D_Hidden_Dropout = tf.nn.dropout(D_Hidden, keep_prob = 0.8)
with tf.name_scope("Output_layer"):
W4 = weight([128, 10])
b4 = bias([10])
y_predict = tf.nn.softmax(tf.matmul(D_Hidden_Dropout, W4) + b4)
with tf.name_scope("Optimizer"):
y_label = tf.placeholder("float", [None, 10], name = "y_label")
loss_function = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(
logits = y_predict, labels = y_label))
optimizer = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(loss_function)
with tf.name_scope("evaluate_accuracy"):
correct_predict = tf.equal(tf.argmax(y_label, 1), tf.argmax(y_predict, 1))
accuracy = tf.reduce_mean(tf.cast(correct_predict, "float"))
train_epochs = 15
batch_size = 100
total_batches = int(mnist.train.num_examples/batch_size)
loss_list = []
epoch_list = []
accuracy_list = []
start_time = time()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
print("-" * 24)
for epoch in range(train_epochs):
for i in range(total_batches):
batch_x, batch_y = mnist.train.next_batch(batch_size)
#conv1, opt = sess.run([Conv1, optimizer], feed_dict = {x: batch_x, y_label: batch_y})
sess.run(optimizer, feed_dict = {x: batch_x, y_label: batch_y})
loss, acc = sess.run([loss_function, accuracy],
feed_dict = {x: mnist.validation.images, y_label: mnist.validation.labels})
epoch_list.append(epoch)
loss_list.append(loss)
accuracy_list.append(acc)
print("Train Epoch: ", "%2d, " % (epoch + 1),
"Loss = {:.9f}, ".format(loss),
"Accuracy = ", acc)
print("-" * 24)
duration = time() - start_time
print("Train finished takes: ", duration)
print("-" * 24)
print("Accuracy: ", sess.run(accuracy, feed_dict={x:mnist.test.images, y_label:mnist.test.labels}))
prediction_result = sess.run(tf.argmax(y_predict, 1), feed_dict={x: mnist.test.images})
print("predict result: ", prediction_result[:10])
plot_image_label_prediction(mnist.test.images, mnist.test.labels, prediction_result, num = 25)
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter("log/tfCNN/", sess.graph)
sess.close()
Train Epoch: 1, Loss = 1.498806715, Accuracy = 0.9666
Train Epoch: 2, Loss = 1.484742522, Accuracy = 0.9776
Train Epoch: 3, Loss = 1.479202986, Accuracy = 0.983
Train Epoch: 4, Loss = 1.478838921, Accuracy = 0.9828
Train Epoch: 5, Loss = 1.475174189, Accuracy = 0.9868
Train Epoch: 6, Loss = 1.475596905, Accuracy = 0.9858
Train Epoch: 7, Loss = 1.473198295, Accuracy = 0.9892
Train Epoch: 8, Loss = 1.471612334, Accuracy = 0.9898
Train Epoch: 9, Loss = 1.473576903, Accuracy = 0.9876
Train Epoch: 10, Loss = 1.474220037, Accuracy = 0.9866
Train Epoch: 11, Loss = 1.471194744, Accuracy = 0.9904
Train Epoch: 12, Loss = 1.472056985, Accuracy = 0.9898
Train Epoch: 13, Loss = 1.472615361, Accuracy = 0.9886
Train Epoch: 14, Loss = 1.472827315, Accuracy = 0.9886
Train Epoch: 15, Loss = 1.471400023, Accuracy = 0.9898
Train finished takes: 759.6484830379486
Accuracy: 0.9913
思考问题:原本想将卷积后的图片dump出来看看,发现卷积后,变成了28x28x16的数据,还没有弄清楚和原始28x28数据的对应关系。
参考答案:https://buptldy.github.io/2016/10/01/2016-10-01-im2col/
根据参考答案,对conv1取conv1[0][:, :, 0],即每一列即是一个图片的一行卷积后的结果,保存卷积后的图片: