一个完整的TensorFlow程序来解决MNIST手写体数字识别问题。用到了带指数衰减的学习率设置、正则化避免过拟合,以及滑动平均模型来增加模型鲁棒性。
自动下载不成功的可以去http://yann.lecun.com/exdb/mnist/ 下载,下载完直接放到文件夹里面就好,不用解压。
import tensorflow as tf
import os
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('../datasets/MNIST_data/', one_hot=True)
print("Training data size: ", mnist.train.num_examples)
print("Validating data size: ", mnist.validation.num_examples)
print("Testing data size: ", mnist.test.num_examples)
Training data size: 55000
Validating data size: 5000
Testing data size: 10000
print("Example training data: ", mnist.train.images[0] )
print("Example training data label: ", mnist.train.labels[0])
Example training data: [ 0. 0. 0. 0. 0. 0. 0.
·····
0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. ]
Example training data label: [ 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
INPUT_NODE = 784 # 输入层节点数,图片是28*28*1的格式,每个像素点对应一个节点就是784
OUTPUT_NODE = 10 # 输出层节点数,0-9十个数字
LAYER1_NODE = 500 # 第一个隐藏层的节点数
BATCH_SIZE = 100 # batch的大小,越大训练过程越接近梯度下降,越小越接近随机梯度下降
LEARNING_RATE_BASE = 0.8 # 基础的学习率
LEARNING_RATE_DECAY = 0.99 # 学习率的衰减值
REGULARIZATION_RATE = 0.0001 # 正则化的λ系数
TRAINING_STEPS = 30000 # 训练的轮数
MOVING_AVERAGE_DECAY = 0.99 # 滑动平均衰减率
def get_weight_variable(shape, regualrizer):
# get_variable()获取这个参数的现有变量或创建一个新变量。获取的参数根据"name"指定
# truncated_normal_initializer(), 从截断的正态分布中输出随机值。
# 生成的值服从具有指定平均值和标准偏差的正态分布,
# 如果生成的值大于平均值2个标准偏差的值则丢弃重新选择。
# stddev 要生成的随机值的标准偏差
weights = tf.get_variable("weights", shape,
initializer=tf.truncated_normal_initializer(stddev=0.1))
if regualrizer != None:
# 传入的参数regualrizer是一个函数,详情见下面的定义
tf.add_to_collection("losses", regualrizer(weights))
return weights
def inference(x, regularizer):
"""
辅助函数,给定神经网络的输入和所有参数,计算向前传播的结果
定义了一个relu激活的三层全连接网络(输入层,隐藏层,输出层)
通过relu实现去线性化。在这个函数中也支持传入用于计算参数平均值的类,
方便在测试的时候使用滑动平均模型
"""
# variable_scope()用于定义创建变量(层)的操作的上下文管理器。此上下文管理器验证(可选)的
# values来自同一图形,确保图形是默认图形,并推送名称范围和变量范围
# 创建变量layer1,因为涉及到两个变量的生成,所以用这个
with tf.variable_scope('layer1', reuse=False):
weights = get_weight_variable([INPUT_NODE, LAYER1_NODE], regularizer)
biases = tf.get_variable("biases", [LAYER1_NODE],
initializer=tf.constant_initializer(0.0))
layer1 = tf.nn.relu(tf.matmul(x, weights) + biases)
with tf.variable_scope('layer2', reuse=False):
weights = get_weight_variable([LAYER1_NODE, OUTPUT_NODE], regularizer)
biases = tf.get_variable("biases", [OUTPUT_NODE],
initializer=tf.constant_initializer(0.0))
layer2 = tf.matmul(layer1, weights) + biases
return layer2
def train(mnist):
"""训练模型"""
x = tf.placeholder(tf.float32, shape=[None, INPUT_NODE], name="x-input")
y_ = tf.placeholder(tf.float32, shape=[None, OUTPUT_NODE], name="y-input")
# 定义正则化的方法
regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
# 向前传播求出y
y = inference(x, regularizer)
# 定义训练的轮数,需要用trainable=False参数指定不训练这个变量,
# 这样同时也可以避免这个变量被计算滑动平均值
global_step = tf.Variable(0, trainable=False)
# 给定滑动平均衰减速率和训练轮数,初始化滑动平均类
# 定训练轮数的变量可以加快训练前期的迭代速度
variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,
global_step)
# 用tf.trainable_variable()获取所有可以训练的变量列表,全部使用滑动平均
variables_averages_op = variable_averages.apply(tf.trainable_variables())
# 定义损失函数
# 因为标准答案是一个长度为10的一维数组,argmax可以从这个矩阵(y_)的轴为1的部分取最大值的序号
# 注意前面已经热点化答案了,所以最大值为1,其他值为0
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,
labels=tf.argmax(y_, 1))
# 获取总损失平均值
cross_entropy_mean = tf.reduce_mean(cross_entropy)
# 给损失加上正则化的损失
# 使用get_collection获取losses集合的全部值的列表,然后用add_n求列表的所有值的和
loss = cross_entropy_mean + tf.add_n(tf.get_collection("losses"))
# 求加上指数衰减的学习率
learning_rate = tf.train.exponential_decay(
LEARNING_RATE_BASE,
global_step,
mnist.train.num_examples / BATCH_SIZE,
LEARNING_RATE_DECAY,
staircase = True
)
# 优化损失函数
# global_step初始值为0,在loss更新后会+1,用来记录更新的次数
# 返回值是训练之后的梯度,会随着global_step递增
train_step = tf.train.GradientDescentOptimizer(
learning_rate).minimize(loss, global_step=global_step)
# 反向传播更新参数之后需要更新每一个参数的滑动平均值,用下面的代码可以一次完成这两个操作
with tf.control_dependencies([train_step, variables_averages_op]):
train_op = tf.no_op(name="train")
# y是计算得出的预测答案,而y_是正确答案,用argmax获取答案的序号(也即是数字的值)
# equal()判断两个答案是否相等,是就返回True,否就返回False
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
# cast()把一个布尔类型的数转换为实数,然后用reduce_mean计算平均值,获取准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# 开启会话,计算
with tf.Session() as sess:
# 初始化全局变量
tf.global_variables_initializer().run()
validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
test_feed = {x: mnist.test.images, y_: mnist.test.labels}
for i in range(TRAINING_STEPS):
# tensorflow的数据集特有的一种batch_size获取方法
if i % 1000 == 0:
# 获取计算之后的loss和global_step
validate_acc = sess.run(accuracy, feed_dict=validate_feed)
print("After %d traing times, validate accuracy using average model is %g"
% (i, validate_acc))
xs, ys = mnist.train.next_batch(BATCH_SIZE)
sess.run(train_op, feed_dict={x:xs, y_:ys})
# 使用模型训练测试集,获取最终的准确率
test_acc = sess.run(accuracy, feed_dict=test_feed)
print("最终准确率是", test_acc)
def main(argv=None):
mnist = input_data.read_data_sets('../datasets/MNIST_data/', one_hot=True)
train(mnist)
if __name__ == "__main__":
main()
Extracting ../datasets/MNIST_data/train-images-idx3-ubyte.gz
Extracting ../datasets/MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../datasets/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../datasets/MNIST_data/t10k-labels-idx1-ubyte.gz
After 0 traing times, validate accuracy using average model is 0.0914
After 1000 traing times, validate accuracy using average model is 0.97
After 2000 traing times, validate accuracy using average model is 0.9778
After 3000 traing times, validate accuracy using average model is 0.9806
After 4000 traing times, validate accuracy using average model is 0.9846
·····
After 26000 traing times, validate accuracy using average model is 0.987
After 27000 traing times, validate accuracy using average model is 0.9868
After 28000 traing times, validate accuracy using average model is 0.9864
After 29000 traing times, validate accuracy using average model is 0.987
最终准确率是 0.9845
这东西书的tensorflow版本太老,看着别人的代码又不够详细,弄了几个小时终于好了,吐血