Tensorflow 搭建CNN

Tensorflow搭建的多层感知机网络准确率有一定的极限，需要搭建CNN来进一步提高准确率。相对于MLP，主要是增加了卷积和池化层。

实现一个CNN，首先得定义一些可以复用的基本单元，主要是weight、bias、conv2d（卷积）和池化（最大值）。第二步，搭建CNN网络，每层网络都用tf.name_scope定义名字，包括输入层，卷积层1，池化层1，卷积层2，池化层2，平坦层，隐藏层，输出层；还需要定义优化器，包括损失函数和精度。第三步训练、评估和预测。

import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
import matplotlib.pyplot as plt
import numpy as np
from time import time
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

def show_image(image):
    plt.imshow(image.reshape(28, 28), cmap = 'binary')
    plt.show()

def plot_image_label_prediction(images, labels, prediction = [], idx = 0, num = 10):
    fig = plt.gcf()
    fig.set_size_inches(12, 14)
    if num > 25:
        num = 25
    for i in range(0, num):
        ax = plt.subplot(5, 5, 1 + i)
        ax.imshow(np.reshape(images[idx], (28, 28)), cmap="binary")
        title = "label = " + str(np.argmax(labels[idx]))
        if len(prediction) > 0:
            title += ", prediction = " + str(prediction[idx])
        ax.set_title(title, fontsize = 10)
        ax.set_xticks([])
        ax.set_yticks([])
        idx += 1
    plt.show()

def layer(output_dim, input_dim, inputs, activation = None):
    W = tf.Variable(tf.random_normal([input_dim, output_dim]))
    b = tf.Variable(tf.random_normal([1, output_dim]))
    XWb = tf.matmul(inputs, W) + b
    if activation is None:
        outputs  = XWb
    else:
        outputs = activation(XWb)

    return outputs

def weight(shape):
    return tf.Variable(tf.truncated_normal(shape, stddev = 0.1), name = "W")

def bias(shape):
    return tf.Variable(tf.constant(0.1, shape = shape), name = "b")

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides = [1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize = [1, 2, 2, 1],
            strides = [1, 2, 2, 1],
            padding = 'SAME')


mnist = input_data.read_data_sets("data/MNIST_data/", one_hot = True)
#show_image(mnist.train.images[0])
print("labels]0]: ", mnist.train.labels[0])
print("labels[0]: ", np.argmax(mnist.train.labels[0]))
#plot_image_label_prediction(mnist.train.images, mnist.train.labels)
#batch_images, batch_labels = mnist.train.next_batch(batch_size = 100)
#plot_image_label_prediction(batch_images, batch_labels)

with tf.name_scope("Input_layer"):
    x = tf.placeholder("float", [None, 784], name = "x")
    x_image = tf.reshape(x, [-1, 28, 28, 1])

with tf.name_scope("C1_Conv"):
    W1 = weight([5, 5, 1, 16])
    b1 = bias([16])
    Conv1 = conv2d(x_image, W1) + b1
    C1_Conv = tf.nn.relu(Conv1)

with tf.name_scope("C1_Pool"):
    C1_Pool = max_pool_2x2(C1_Conv)

with tf.name_scope("C2_Conv"):
    W2 = weight([5, 5, 16, 36])
    b2 = bias([36])
    Conv2 = conv2d(C1_Pool, W2) + b2
    C2_Conv = tf.nn.relu(Conv2)

with tf.name_scope("C2_Pool"):
    C2_Pool = max_pool_2x2(C2_Conv)

with tf.name_scope("D_Flat"):
    D_Flat = tf.reshape(C2_Pool, [-1, 1764])

with tf.name_scope("D_Hidden_Layer"):
    W3 = weight([1764, 128])
    b3 = bias([128])
    D_Hidden = tf.nn.relu(tf.matmul(D_Flat, W3) + b3)
    D_Hidden_Dropout = tf.nn.dropout(D_Hidden, keep_prob = 0.8)

with tf.name_scope("Output_layer"):
    W4 = weight([128, 10])
    b4 = bias([10])
    y_predict = tf.nn.softmax(tf.matmul(D_Hidden_Dropout, W4) + b4)

with tf.name_scope("Optimizer"):
    y_label = tf.placeholder("float", [None, 10], name = "y_label")
    loss_function = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits = y_predict, labels = y_label))
    optimizer = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(loss_function)

with tf.name_scope("evaluate_accuracy"):
    correct_predict = tf.equal(tf.argmax(y_label, 1), tf.argmax(y_predict, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_predict, "float"))

train_epochs = 15
batch_size = 100
total_batches = int(mnist.train.num_examples/batch_size)
loss_list = []
epoch_list = []
accuracy_list = []

start_time = time()

sess = tf.Session()
sess.run(tf.global_variables_initializer())

print("-" * 24)
for epoch in range(train_epochs):
    for i in range(total_batches):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        #conv1, opt = sess.run([Conv1, optimizer], feed_dict = {x: batch_x, y_label: batch_y})
        sess.run(optimizer, feed_dict = {x: batch_x, y_label: batch_y})

    loss, acc = sess.run([loss_function, accuracy],
            feed_dict = {x: mnist.validation.images, y_label: mnist.validation.labels})
    epoch_list.append(epoch)
    loss_list.append(loss)
    accuracy_list.append(acc)
    print("Train Epoch: ", "%2d, " % (epoch + 1),
        "Loss = {:.9f}, ".format(loss),
        "Accuracy = ", acc)
    print("-" * 24)
duration = time() - start_time
print("Train finished takes: ", duration)
print("-" * 24)

print("Accuracy: ", sess.run(accuracy, feed_dict={x:mnist.test.images, y_label:mnist.test.labels}))

prediction_result = sess.run(tf.argmax(y_predict, 1), feed_dict={x: mnist.test.images})
print("predict result: ", prediction_result[:10])
plot_image_label_prediction(mnist.test.images, mnist.test.labels, prediction_result, num = 25)

merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter("log/tfCNN/", sess.graph)

sess.close()

Train Epoch:   1,  Loss = 1.498806715,  Accuracy =  0.9666
Train Epoch:   2,  Loss = 1.484742522,  Accuracy =  0.9776
Train Epoch:   3,  Loss = 1.479202986,  Accuracy =  0.983
Train Epoch:   4,  Loss = 1.478838921,  Accuracy =  0.9828
Train Epoch:   5,  Loss = 1.475174189,  Accuracy =  0.9868
Train Epoch:   6,  Loss = 1.475596905,  Accuracy =  0.9858
Train Epoch:   7,  Loss = 1.473198295,  Accuracy =  0.9892
Train Epoch:   8,  Loss = 1.471612334,  Accuracy =  0.9898
Train Epoch:   9,  Loss = 1.473576903,  Accuracy =  0.9876
Train Epoch:  10,  Loss = 1.474220037,  Accuracy =  0.9866
Train Epoch:  11,  Loss = 1.471194744,  Accuracy =  0.9904
Train Epoch:  12,  Loss = 1.472056985,  Accuracy =  0.9898
Train Epoch:  13,  Loss = 1.472615361,  Accuracy =  0.9886
Train Epoch:  14,  Loss = 1.472827315,  Accuracy =  0.9886
Train Epoch:  15,  Loss = 1.471400023,  Accuracy =  0.9898
Train finished takes:  759.6484830379486
Accuracy:  0.9913

思考问题：原本想将卷积后的图片dump出来看看，发现卷积后，变成了28x28x16的数据，还没有弄清楚和原始28x28数据的对应关系。
参考答案：https://buptldy.github.io/2016/10/01/2016-10-01-im2col/
根据参考答案，对conv1取conv1[0][:, :, 0]，即每一列即是一个图片的一行卷积后的结果，保存卷积后的图片：

Tensorflow 搭建CNN

你可能感兴趣的:(Tensorflow 搭建CNN)