# 定义神经网络的前向传播过程# -*- coding: utf-8 -*-
import tensorflow as tf #载入tensorflow
INPUT_NODE = 784
# 输入层x的节点数,784=28*28为MNIST数据集图片的像素矩阵
OUTPUT_NODE = 10
# 输出层y的节点数和真实结果y_的矩阵大小,区分0~9这是个数字
LAYER1_NODE = 500
# 此处只使用一个隐藏层,这个隐藏层有500个节点
def get_weight_variables(shape, regularizer):
weights = tf.get_variable("weights", shape, initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer != None:
tf.add_to_collection('losses', regularizer(weights))
return weights
#=========================================================================================================================================
#==========================================================inference()====================================================================
#=========================================================================================================================================
# 函数inference()在预测时使用,计算神经网络的前向传播结果,
# 这里定义了一个使用ReLU激活函数的3层全连接神经网络,通过加入隐藏层实现多层结构,通过使用ReLU激活函数实现去线性化
# 该辅助函数中也支持传入“用于计算参数平均值”的类,这样方便在“测试时”使用滑动平均模型
def inference(input_tensor, regularizer):
with tf.variable_scope('layer1'):
weights = get_weight_variables([INPUT_NODE, LAYER1_NODE], regularizer)
biases = tf.get_variable("biases", [LAYER1_NODE], initializer=tf.constant_initializer(0.0))
layer1 = tf.nn.relu(tf.matmul(input_tensor, weights) + biases)
with tf.variable_scope('layer2'):
weights = get_weight_variables([LAYER1_NODE, OUTPUT_NODE], regularizer)
biases = tf.get_variable("biases", [OUTPUT_NODE], initializer=tf.constant_initializer(0.0))
layer2 = tf.matmul(layer1, weights) + biases
return layer2
# 定义神经网络的模型训练、参数优化过程
# -*- coding: utf-8 -*-
import os
import tensorflow as tf #载入tensorflow
from tensorflow.examples.tutorials.mnist import input_data #载入数据集MNIST模块
import test5_1_mnist_inference
BATCH_SIZE = 100
# 一个训练batch中的训练个数
# batch值越小,训练过程越接近‘随机’梯度下降‘;batch值越大,训练过程越接近’梯度下降‘
LEARNING_RATE_BASE = 0.8
# 基础的学习率
LEARNING_RATE_DECAY = 0.99
# 学习率的衰减率
REGULARIZATION_RATE = 0.0001
# 描述模型复杂度的正则化项,在总损失函数中的权重
TRAINING_STEPS = 20002
# 训练轮数
MOVING_AVERAGE_DECAY = 0.99
# 滑动平均衰减率
MODEL_SAVE_PATH = "/Users/deltasimon/Desktop/Mch_Learn/02test/05MNIST_dealing/to/model/"
MODEL_NAME = "model_mnist_001.ckpt"
# 注意:在一轮训练过程中,会处理一共batch组样本
# x 为(batch_size * 784)的矩阵,表示一个batch的图片输入
# y_ 为(batch_size * 10 )的矩阵,表示一个batch的真实答案
# y 为(batch_size * 10 )的矩阵,表示一个batch的(无滑动平均softmax层)神经网络前向传播输出
# average_y 为(batch_size * 10 )的矩阵,表示一个batch的(有滑动平均softmax层)神经网络前向传播输出
#=========================================================================================================================================
#==========================================================train(mnist)===================================================================
#=========================================================================================================================================
# 函数train()在训练时使用,训练模型、优化参数
def train(mnist):
# 数据集
x = tf.placeholder(tf.float32, [None, test5_1_mnist_inference.INPUT_NODE], name='x-input')
y_ = tf.placeholder(tf.float32, [None, test5_1_mnist_inference.OUTPUT_NODE], name='y-input')
#
regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
#===============================================================
#====================不使用滑动平均*神经网络前向传播结果“y”=======================
#===============================================================
y = test5_1_mnist_inference.inference(x, regularizer)
#===============================================================
#============定义滑动平均操作variables_averages_op & 相应的前向传播结果“average_y”=============
#===============================================================
global_step = tf.Variable(0, trainable=False)
# 当前训练轮数,从0开始计数
# 该变量不需要计算滑动平均值,所以设置为不可训练的变量
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
# 定义滑动平均衰减‘类’variable_averages
# 可使用variable_averages.apply()函数、variable_averages.average()函数。。。等
# variable_averages.apply()函数返回一个更新滑动平均的运算operation
# variable_averages.average()函数返回参数滑动平均后的结果(其新的shadow影子变量值)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
# tf.trainable_variables()返回计算图上 “集合GraphKeys.TRAINABLE_VARIABLES” 中的元素,即所有没有指定“trainable=False”的变量
# variable_averages.apply()函数对/所有‘表示神经网络的’参数中/能够被训练的/进行更新滑动平均的运算操作
# average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
# 计算/‘使用滑动平均之后的’参数shadow值表述的/神经网络前向传播的结果
# 由于滑动平均不会改变参数原本的值,而只是将结果另存为对应的shadow值;所以,当需要使用滑动平均值时,要明确调用average函数获取临时shadow值
# 由此‘包括softmax层的’神经网络得到的前向传播结果为average_y
#===============================================================
#===========================计算y和y_的交叉熵作为原始的损失====================
#===============================================================
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
# 参数logits表示神经网络最后一层softmax层的输入,即y(batch_size*10),函数会对 logits 隐藏 执行tf.argmax()归一化 =》(batch_size*1)
# 参数labels表示期望的输出,即真实结果y_(batch_size*10),函数不会对 label 执行tf.argmax()归一化,需要手动修改=》(batch_size*1)
# 最后,函数会计算隐藏归一的y和手动归一的tf.argmax(y_, 1)两者之间的交叉熵cross entropy
# tensorflow把softmax计算与cross entropy计算放到一起了,用一个函数来实现,用来提高程序的运行速度
# 得到的结果cross_entropy 是大小为( batch_size * 1 )的矩阵
cross_entropy_mean = tf.reduce_mean(cross_entropy)
# 计算当前batch中所有样例的交叉熵的平均值
# cross_entropy_mean是大小为 1*1 的矩阵:标量
#===============================================================
#===================正则化项(描述模型复杂度,防止过拟合) & 最终的总损失==================
#===============================================================
# regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
# 计算L2正则化损失函数
# regularization = regularizer(weights1) + regularizer(weights2)
# 一般只用神经网络各权重项来描述模型复杂度,作为正则化损失
loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
# 总损失 = 交叉熵 + 正则化损失
#===============================================================
#===================定义指数衰减的学习率 & 训练模型操作train_step====================
#===============================================================
learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples/BATCH_SIZE, LEARNING_RATE_DECAY)
# 指数衰减的学习率
# LEARNING_RATE_BASE 基础学习率
# global_step 当前学习率迭代的轮数
# mnist.train.num_examples/BATCH_SIZE 过完所有的训练数据需要的迭代次数
# LEARNING_RATE_DECAY 学习率衰减低速度
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
# 训练模型,优化参数
#===============================================================
#====================定义train_op操作:训练模型操作 + 滑动平均操作==============
#===============================================================
# 将多个操作组合在一起命名给train_op,方便一次完成这些操作
with tf.control_dependencies([train_step, variables_averages_op]):
train_op = tf.no_op(name='train')
# 上面两行程序和:
# train_op = tf.group(train_step, variables_averages_op)
# 是一样的效果
# tf.control_dependencies和tf.group这两种机制可以将多个操作组合在一起命名给train_op,方便一次完成这些操作
saver = tf.train.Saver()
#===============================================================
#===================检验:‘使用了滑动平均模型的’神经网络的前向传播结果是否正确===============
#===============================================================
#correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
# tf.argmax(average_y, 1)函数返回一个batch样例在滑动回归处理后的预测答案
# average_y是(大小为batch_size * 10的)二维矩阵,参数1表示返回的是各行最大值所在的第二维度,即列数0~9
# tf.argmax(average_y, 1)是一个(大小为 batch_size * 1)的一维数组,数组的每个元素表示各个样例对应的数字识别结果0~9
# tf.argmax(y_, 1)函数返回一个batch的真实答案
# correct_prediction 是一个大小为 (batch * 1)的矩阵,即一维数组
#accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# tf.cast()函数将布尔型的correct_prediction转化为浮点型
# tf.reduce_mean()在一组batch上,对correct_prediction求平均值
# accuracy是一个大小为 (1*1)的矩阵,即一个数值标量,表示一个batch上的“正确率”
# -------------------------------------------------------------------------
# -------------------------------创建会话,进行flow运算------------------------------
# -------------------------------------------------------------------------
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# 初始化所有变量
#validate_feed={x:mnist.validation.images, y_:mnist.validation.labels}
# 准备验证数据。在训练时会有训练数据和验证数据参与到整个训练的过程中。
# 在神经网络的训练中,会根据验证数据集来“大致判断”停止的条件 & 评价训练的效果
#test_feed = {x: mnist.test.images, y_: mnist.test.labels}
# 准备测试数据。在真实的应用中,这部分数据在训练时是不可见的。
# 这个数据只是作为最后,用于评价模型优劣
# =======================训练神经网络=======================
for i in range(TRAINING_STEPS):
xs, ys = mnist.train.next_batch(BATCH_SIZE)
# 生成用于该轮训练的一个 batch 的训练数据
_, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys})
#sess.run(train_op, feed_dict={x: xs, y_: ys})
# 对该轮一个batch数据集,运行训练过程。
# 每1000轮输出一次在验证数据集上的测试结果
if i % 1000 == 1:
#validate_acc = sess.run(accuracy, feed_dict=validate_feed)
# 计算验证正确率,即滑动平均模型在验证数据上的表现
# 因为 MNIST 数据集比较小,所以一次可以处理所有的验证数据。
# 为了计算方便,本样例程序没有将验证数据划分为更小的 batch。
# 当神经网络模型比较复杂或验证数据比较大时,太大的 batch 会导致计算时间过长甚至发生内存溢出的错误。
#test_acc = sess.run(accuracy, feed_dict=test_feed)
# 计算测试正确率,即滑动平均模型在测试数据上的表现
# 因为 MNIST 数据集比较小,所以一次可以处理所有的测试数据。
print("After %d training step(s), loss on training batch is %g, " % (i, loss_value))
# 打印当前的验证正确率 & 测试正确率
saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)
# =======================训练结束=======================
sess.close()
# ---------------------------------------------------------------
# ----------------------结束会话,完成flow运算------------------------------
# ---------------------------------------------------------------
#=========================================================================================================================================
#=============================================================main()======================================================================
#=========================================================================================================================================
# 主程序入口
def main(argv=None):
mnist = input_data.read_data_sets("/tmp/data", one_hot=True)
train(mnist)
if __name__ == '__main__':
tf.app.run()
# 定义测试
# -*- coding: utf-8 -*-
import tensorflow as tf #载入tensorflow
from tensorflow.examples.tutorials.mnist import input_data #载入数据集MNIST
import time # 载入时间模块
import test5_1_mnist_inference # 载入test5_1_mnist_inference.py
import test5_2_mnist_train # 载入test5_2_mnist_train.py
# 每10秒加载一次最新的模型
# 将最新的模型用于在测试数据集上的测试正确率
EVAL_INTERVAL_SECS = 10
def evaluate(mnist):
with tf.Graph().as_default() as g:
# 定义输入输出的格式
x = tf.placeholder(tf.float32, [None, test5_1_mnist_inference.INPUT_NODE], name='x-input')
y_ = tf.placeholder(tf.float32, [None, test5_1_mnist_inference.OUTPUT_NODE], name='y-input')
# 定义输入输出数据集
validate_feed = {x:mnist.validation.images, y_:mnist.validation.labels}
# 来自mnist.validation,即 “MNIST 的 验证集”
# 利用test5_1_mnist_inference.py中定义的前向传播过程,计算神经网络模型计算出的预算结果
y = test5_1_mnist_inference.inference(x, None)
# 因为测试时不关注正则化损失的值,所以这里用于计算正则化损失的参数设置为None
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
# 计算y和y_的正确率,correct_prediction是大小为(batch_size*1)的矩阵
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# 正确率accuracy是一个batch上正确率的平均值
variable_averages = tf.train.ExponentialMovingAverage(test5_2_mnist_train.MOVING_AVERAGE_DECAY)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
# 定义tf.train.Saver类的对象saver
# 此处使用变量重命名
# 每隔EVAL_INTERVAL_SECS = 10秒调用一次计算正确率的过程
# 用于检测训练过程中正确率的变化
while True:
with tf.Session() as sess:
ckpt = tf.train.get_checkpoint_state(test5_2_mnist_train.MODEL_SAVE_PATH)
# 函数get_checkpoint_state()会根据checkpoint文件自动找到目录中“最新”模型的文件名
# 如果ckpt和ckpt.model_checkpoint_path都正确:
if ckpt and ckpt.model_checkpoint_path:
# 就加载模型
saver.restore(sess, ckpt.model_checkpoint_path)
global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
# 通过文件名得到保存时迭代代轮数
accuracy_score = sess.run(accuracy, feed_dict=validate_feed)
# 计算在验证集上的正确率
print("After %s training step(s), validation accuracy = %g" % (global_step, accuracy_score))
# 将验证集正确率打印出来
# 如果文件或路径不存在:
else:
print('No checkpoint file found')
return
time.sleep(EVAL_INTERVAL_SECS)
def main(argv=None):
mnist = input_data.read_data_sets("/tmp/data", one_hot=True)
evaluate(mnist)
if __name__ == '__main__':
tf.app.run()
# 为了程序能够按照预期的效果运行,即每隔10秒,载入当前目录下最新的模型,检测训练过程中验证集上正确率的变化
# 注意:
# 如果已经运行过train,需要先将/to/model目录下所有的已保存模型删除
# 否则,本程序运行时就会直接用3w次训练后的最终模型,无法观察训练过程“验证集上”正确率的变化
# 现在:
# 重新运行train
# 紧接着运行eval
# 这样就可以在训练的同时,观察训练过程中模型在验证集上正确率的变化
# 程序正常运行时的一例输出:
# After 2002 training step(s), validation accuracy = 0.9816
# After 3002 training step(s), validation accuracy = 0.9832
# After 4002 training step(s), validation accuracy = 0.9834
# After 5002 training step(s), validation accuracy = 0.9842
# After 6002 training step(s), validation accuracy = 0.9838
# After 6002 training step(s), validation accuracy = 0.9838
# After 7002 training step(s), validation accuracy = 0.9844
# After 8002 training step(s), validation accuracy = 0.9838
# After 9002 training step(s), validation accuracy = 0.9838
# After 10002 training step(s), validation accuracy = 0.9842
# After 11002 training step(s), validation accuracy = 0.9844
# After 11002 training step(s), validation accuracy = 0.9844
# After 12002 training step(s), validation accuracy = 0.9854
# After 13002 training step(s), validation accuracy = 0.9848
# After 14002 training step(s), validation accuracy = 0.984
# After 15002 training step(s), validation accuracy = 0.9846
# After 16002 training step(s), validation accuracy = 0.985
# After 17002 training step(s), validation accuracy = 0.985
# After 17002 training step(s), validation accuracy = 0.985
# After 18002 training step(s), validation accuracy = 0.9852
# After 19002 training step(s), validation accuracy = 0.9852
# After 20002 training step(s), validation accuracy = 0.9856
# After 20002 training step(s), validation accuracy = 0.9856
# After 20002 training step(s), validation accuracy = 0.9856
# After 20002 training step(s), validation accuracy = 0.9856
# After 20002 training step(s), validation accuracy = 0.9856
# After 20002 training step(s), validation accuracy = 0.9856
# 按“control” + “c” 停止程序运行
# 输出:
# [Cancelled]