相比前一次练习,主要有以下几点改进:
参数的初始化由服从固定方差的高斯分布改为了MSRA(服从动态方差的高斯分布)。学习率动态衰减,每过一个epoch做一次衰减。
"""A very simple MNIST classifier.
See extensive documentation at
https://www.tensorflow.org/get_started/mnist/beginners
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import sys
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import tensorflow as tf
data_dir = './data/'
mnist = input_data.read_data_sets(data_dir, one_hot=True)
L1_neure_count = 100 #第一层神经元数量
init_learning_rate = tf.placeholder(tf.float32) #学习率
x = tf.placeholder(tf.float32, [None, 784]) #输入
#每个epoch需要的步数
epoch_steps = tf.to_int64(tf.div(60000, tf.shape(x)[0]))
#创建一个计数器
global_step = tf.train.get_or_create_global_step()
#当前epoch,由global_step计算
current_epoch = global_step//epoch_steps
decay_times = current_epoch/1 #相当于一个epoch就衰减一次
current_learning_rate = tf.multiply(init_learning_rate, tf.pow(0.575, tf.to_float(decay_times)))
#MSRA初始化
W1 = tf.Variable(init([784, L1_neure_count], stddev=np.sqrt(2/784)))
b1 = tf.Variable(tf.constant(0.001, shape=[L1_neure_count]))
#隐层1
l1 = tf.matmul(x, W1) + b1
o1 = activate(l1)
L2_neure_count = 10
W2 = tf.Variable(init([L1_neure_count, L2_neure_count], stddev=np.sqrt(2/L1_neure_count)))
b2 = tf.Variable(tf.constant(0.001, shape=[L2_neure_count]))
l2 = tf.matmul(o1, W2) + b2
y = l2
#定义损失和优化器
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
#把惩罚(正则)直接加到损失函数里面,而不是在每一层的激活函数后面加入惩罚
l2_loss = tf.nn.l2_loss(W1) + tf.nn.l2_loss(W1) #总的惩罚项
total_loss = cross_entropy + 4e-5*l2_loss #总的损失函数
#定义Adam优化器
optimizer = tf.train.AdamOptimizer(current_learning_rate)
train_step = optimizer.minimize(total_loss, global_step)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
for step in range(3000):
batch_xs, batch_ys = mnist.train.next_batch(100)
lr = 1e-2
_, loss, l2_loss_value, total_loss_value, current_lr_value = \
sess.run([train_step, cross_entropy, l2_loss, total_loss, current_learning_rate],
feed_dict={x: batch_xs, y_: batch_ys, init_learning_rate:lr})
if (step+1)%100 == 0:
print("step:%d, entropy_loss:%f, l2_loss:%f, total_loss:%f" %
(step+1, loss, l2_loss_value, total_loss_value))
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
# Test trained model
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
运行:
step:500, entropy_loss:0.242228, l2_loss:2572.453369, total_loss:0.3451270.9404 step:1000, entropy_loss:0.085225, l2_loss:2051.850342, total_loss:0.1672990.9751 step:1500, entropy_loss:0.087876, l2_loss:1579.760864, total_loss:0.1510670.9772 step:2000, entropy_loss:0.018950, l2_loss:1289.336548, total_loss:0.0705240.983
step:2500, entropy_loss:0.017045, l2_loss:1099.483887, total_loss:0.0610240.982 step:3000, entropy_loss:0.007257, l2_loss:973.254883, total_loss:0.0461880.9826