Tensorflow1-LeNet-5 手写数字识别

import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data  # 导入数据的模块
from datetime import datetime
import os

batch_size = 100  # 表示每一批训练100组数据,因为训练集共有数据55000组,故而训练1个epoch需要经过550次计算。
img_size = 28     # 手写字图像的大小
num_class = 10    # 图像的类别
num_epochs = 2    # 训练迭代数
display_step = 2  # 每2步打印损失和准确率
IMAGE_SIZE = 28   # 图像尺寸
NUM_CHANNELS = 1  # 图像通道数

X = tf.placeholder(dtype=tf.float32, shape=[None, img_size, img_size, 1], name='input')
Y = tf.placeholder(dtype=tf.float32, shape=[None, num_class])

# 后面用到dropout层的保留的参数,dropout参数为随即丢弃(保留节点)的比例
p_keep = tf.placeholder(tf.float32, name='p_keep_rate')

mnist = input_data.read_data_sets('dataset', one_hot=True)  # 导入mnist数据集
train_X, train_Y, test_X, test_Y = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
# MNIST数据集中的图片是28X28 Pixel,所以,每一幅图就是1行784(28X28)列的数据,括号中的每一个值代表一个像素。
# mnist.train.images是一个形状为[55000,784]的张量。其中,第1个维度数字用来索引图片,第2个维度数字用来索引每张图片中的像素点。
# 此张量里的每一个元素,都表示某张图片里的某个像素的强度值,值介入0~255之间。
# train_Y 为(55000, 10)第一个位置表示数据的索引,第二个位置表示标签的索引。此矩阵为55000行X10列,其中10列为0~9,每一行为one-hot编码,除了某一位为1,其余全为0.
# test_Y 为(10000, 10)

train_X = train_X.reshape(-1, img_size, img_size, 1)  # 训练数据和测试数据都需要重塑形状,因为导入数据的长度为784
test_X = test_X.reshape(-1, img_size, img_size, 1)  # reshape(-1, m) 实现转换成m列
# train_X 为(55000, 28, 28, 1)
# test_X 为(10000, 28, 28, 1)

# 第一个卷积层
# 卷积核大小为3x3x1x32,相当于是32个3x3x1的卷积核,移动步幅长为1,故而得到的输出为32个特征图,也可以说成一幅图像有32个通道,28x28x32.
with tf.name_scope('cnn_layer_01') as cnn_01:
    # tf.Variable为图变量,tf.Variable(initializer,name),参数initializer是初始化参数,name是可自定义的变量名称
    # shape为张量输出的形状; stddev为正态分布的标准差,默认为1.0
    w1 = tf.Variable(tf.random_normal(shape=[3, 3, 1, 32], stddev=0.01))
    # tf.nn.conv2d(input, filter, strides, padding, use_cudnn_on_gpu=None, name=None)
    # input为被卷积的图像,shape为[batch, in_height, in_width, in_channels],为4维的tensor;
    # filter为过滤器,shape为[filter_height, filter_width, in_channels, out_channels];
    # strides为卷积时在图像每一维的步长,这是一个1维向量,shape为[ 1, strides, strides, 1],第一位和最后一位固定必须是1;
    # padding为填充,用来保证输入输出大小尺寸相同,分为"SAME"(在数据不满足pooling步长时用0填充)和"VALID"(不做填充,直接舍弃不足pooling步长的列或行);
    # use_cudnn_on_gpu:bool为是否使用cudnn加速,默认为true;
    # tf.nn.conv2d()结果返回一个Tensor,这个输出就是feature map,shape为[batch, height, width, channels].
    conv1 = tf.nn.conv2d(X, w1, strides=[1, 1, 1, 1], padding="SAME")
    # tf.nn.relu(features, name = None),即max(features, 0),将大于0的数保持不变,小于0的数置为0
    conv_y1 = tf.nn.relu(conv1)

# 第一个池化层
# 采用最大化池化层,为2x2,移动步幅为2,故而输出为:14x14x32
with tf.name_scope('pool_layer_01') as pool_01:
    # tf.nn.max_pool(value, ksize, strides, padding, name=None)
    # value为需要池化的输入,输入通常是feature map,shape是[batch, height, width, channels];
    # ksize为池化窗口的大小,取一个四维向量,shape是[1, height, width, 1],因为不在batch和channels上池化,所以这两个维度设为1;
    # strides为窗口在每一个维度上滑动的步长,shape是[1, stride,stride, 1];
    # padding为填充,可以取'VALID' 或者'SAME';
    # tf.nn.max_pool()返回一个Tensor,类型不变,shape是[batch, height, width, channels].
    pool_y2 = tf.nn.max_pool(conv_y1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    # tf.nn.dropout是TensorFlow里为防止或减轻过拟合而使用的函数,一般用在全连接层;
    # Dropout在不同的训练过程中随机扔掉一部分神经元,就是让某个神经元的激活值以一定的概率p,让其停止工作,这次训练中不更新权值,也不参加神经网络的计算.
    # 但是它的权重得保留下来(只是暂时不更新而已), 因为下次样本输入时可能又得工作了.
    # tf.nn.dropout(x, keep_prob, noise_shape=None, seed=None,name=None)
    # keep_prob:设置神经元被选中的概率,在初始化时keep_prob是一个占位符;
    pool_y2 = tf.nn.dropout(pool_y2, p_keep)

# 第二个卷积层
# 卷积核大小为3x3x32x64,相当于是64个3x3x32的卷积核,移动步幅长为1,故而得到的输出为64个特征图,也可以说成一幅图像有64个通道,14x14x64.
with tf.name_scope('cnn_layer_02') as cnn_02:
    w2 = tf.Variable(tf.random_normal(shape=[3, 3, 32, 64], stddev=0.01))
    conv2 = tf.nn.conv2d(pool_y2, w2, strides=[1, 1, 1, 1], padding="SAME")
    conv_y3 = tf.nn.relu(conv2)

# 第二个池化层
# 采用最大化池化层,为2x2,移动步幅为2,故而输出为:7x7x64
with tf.name_scope('pool_layer_02') as pool_02:
    pool_y4 = tf.nn.max_pool(conv_y3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    pool_y4 = tf.nn.dropout(pool_y4, p_keep)

# 第三个卷积层
# 卷积核大小为3x3x64x128,相当于是128个3x3x64的卷积核,移动步幅长为1,故而得到的输出为128个特征图,也可以说成一幅图像有128个通道,7x7x128.
with tf.name_scope('cnn_layer_03') as cnn_03:
    w3 = tf.Variable(tf.random_normal(shape=[3, 3, 64, 128], stddev=0.01))
    conv3 = tf.nn.conv2d(pool_y4, w3, strides=[1, 1, 1, 1], padding="SAME")
    conv_y5 = tf.nn.relu(conv3)

# 第三个池化层
# 采用最大化池化层,为2x2,移动步幅为2,故而输出为:4x4x128.
with tf.name_scope('pool_layer_03') as pool_03:
    pool_y6 = tf.nn.max_pool(conv_y5, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

# 全连接层
# 输入为128x4x4,需要将其展开成一维的,输出为625.
with tf.name_scope('full_layer_01') as full_01:
    w4 = tf.Variable(tf.random_normal(shape=[128*4*4, 625], stddev=0.01))
    FC_layer = tf.reshape(pool_y6, [-1, w4.get_shape().as_list()[0]])
    FC_layer = tf.nn.dropout(FC_layer, p_keep)
    # tf.matmul() 将矩阵a乘以矩阵b,生成a * b.
    FC_y7 = tf.matmul(FC_layer, w4)
    FC_y7 = tf.nn.relu(FC_y7)
    FC_y7 = tf.nn.dropout(FC_y7, p_keep)

# 输出层,model_Y则为神经网络的预测输出
# 输入为625,输出为10.
with tf.name_scope('output_layer') as output_layer:
    w5 = tf.Variable(tf.random_normal(shape=[625, num_class]))
    model_Y = tf.matmul(FC_y7, w5, name='output')

# 损失函数
Y_ = tf.nn.softmax_cross_entropy_with_logits(logits=model_Y, labels=Y)
cost = tf.reduce_mean(Y_)

# 准确率
# tf.equal(A, B)是对比这两个矩阵或者向量的相等的元素,如果相等返回True,反之返回False,返回的值的矩阵维度和A是一样的.
# tf.argmax()函数,它能给出某个tensor对象在某一维上的其数据最大值所在的索引值.axis可被设置为0或1,分别表示0:按列计算,1:行计算
correct_prediction = tf.equal(tf.argmax(model_Y, axis=1), tf.argmax(Y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# 优化方式
# 使用RMSProp算法的Optimizer,两个参数为全局学习率和衰减速率.
optimizer = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)

# 将相关的参数写入tensorboard
tf.summary.scalar('loss', cost)
tf.summary.scalar('accuracy', accuracy)
tf.summary.histogram('w1', w1)
tf.summary.histogram('w2', w2)
tf.summary.histogram('w3', w3)
tf.summary.histogram('w4', w4)
tf.summary.histogram('w5', w5)
merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter('mnist_summary')

# 定义一次迭代的迭代次数
train_batches_per_epoch = int(len(train_X)/batch_size)
test_batches_per_epoch = int(len(test_X)/batch_size)
# 构建会话任务
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    writer.add_graph(sess.graph)

    for epoch in range(num_epochs):
        print("{}: Epoch number: {} start".format(datetime.now(), epoch+1))

        # 训练
        for step in range(train_batches_per_epoch):
            img_batch, label_batch = mnist.train.next_batch(batch_size)
            # 将输入的训练数据格式调整为一个四维矩阵
            reshaped_img_batch = np.reshape(img_batch, (batch_size, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))

            opti, summary, loss, accu = sess.run([optimizer, merged_summary, cost, accuracy],
                                                 feed_dict={X: reshaped_img_batch, Y: label_batch,
                                                            p_keep: 0.8})
            # 每2步打印损失和准确率
            if step % display_step == 0:
                # 计算损失
                print("{}: step = {}  loss = {}".format(datetime.now(), step, loss))
                # Tensorboard 可视化
                writer.add_summary(summary, epoch * train_batches_per_epoch + step)

        # 测试准确率
        print("{}: Start validation".format(datetime.now()))
        test_accu = 0.
        test_count = 0

        for _ in range(test_batches_per_epoch):
            img_batch, label_batch = mnist.test.next_batch(batch_size)
            reshaped_img_batch = np.reshape(img_batch, (batch_size, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
            accu = sess.run(accuracy, feed_dict={X: reshaped_img_batch, Y: label_batch,
                                                            p_keep: 0.8})
            test_accu += accu
            test_count += 1
            
        try:
            test_accu /= test_count
        except:
            print('ZeroDivisionError!')
        print("{}: Validation Accuracy = {:.4f}".format(datetime.now(), test_accu))

        # 保存模型
        saver = tf.train.Saver()
        checkpoint_name = os.path.join('mnist_model', 'model_epoch' + str(epoch + 1))
        path = saver.save(sess, checkpoint_name)
        print("{}: Epoch number: {} end".format(datetime.now(), epoch + 1))

你可能感兴趣的:(TensorFlow,&,Keras)