利用DNN训练mnist数据集(2)

相比前一次练习,主要有以下几点改进:

参数的初始化由服从固定方差的高斯分布改为了MSRA(服从动态方差的高斯分布)。
优化器由SGD(随机梯度下降)改为了Adam。

学习率动态衰减,每过一个epoch做一次衰减。

"""A very simple MNIST classifier.
See extensive documentation at
https://www.tensorflow.org/get_started/mnist/beginners
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import sys

from tensorflow.examples.tutorials.mnist import input_data

import numpy as np
import tensorflow as tf
data_dir = './data/'
mnist = input_data.read_data_sets(data_dir, one_hot=True)
L1_neure_count = 100 #第一层神经元数量
init_learning_rate = tf.placeholder(tf.float32) #学习率
x = tf.placeholder(tf.float32, [None, 784]) #输入

#每个epoch需要的步数
epoch_steps = tf.to_int64(tf.div(60000, tf.shape(x)[0])) 
#创建一个计数器
global_step = tf.train.get_or_create_global_step() 
#当前epoch,由global_step计算
current_epoch = global_step//epoch_steps  
decay_times = current_epoch/1  #相当于一个epoch就衰减一次
current_learning_rate = tf.multiply(init_learning_rate, tf.pow(0.575, tf.to_float(decay_times)))

#MSRA初始化
W1 = tf.Variable(init([784, L1_neure_count], stddev=np.sqrt(2/784)))
b1 = tf.Variable(tf.constant(0.001, shape=[L1_neure_count]))

#隐层1
l1 = tf.matmul(x, W1) + b1
o1 = activate(l1)

L2_neure_count = 10
W2 = tf.Variable(init([L1_neure_count, L2_neure_count], stddev=np.sqrt(2/L1_neure_count)))
b2 = tf.Variable(tf.constant(0.001, shape=[L2_neure_count]))
l2 = tf.matmul(o1, W2) + b2

y = l2

#定义损失和优化器
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))

#把惩罚(正则)直接加到损失函数里面,而不是在每一层的激活函数后面加入惩罚
l2_loss = tf.nn.l2_loss(W1) + tf.nn.l2_loss(W1) #总的惩罚项
total_loss = cross_entropy + 4e-5*l2_loss #总的损失函数

#定义Adam优化器
optimizer = tf.train.AdamOptimizer(current_learning_rate)
train_step = optimizer.minimize(total_loss, global_step)

correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

for step in range(3000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    lr = 1e-2
    _, loss, l2_loss_value, total_loss_value, current_lr_value = \
        sess.run([train_step, cross_entropy, l2_loss, total_loss, current_learning_rate],
                feed_dict={x: batch_xs, y_: batch_ys, init_learning_rate:lr})
        
    if (step+1)%100 == 0:
        print("step:%d, entropy_loss:%f, l2_loss:%f, total_loss:%f" % 
             (step+1, loss, l2_loss_value, total_loss_value))
        print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

# Test trained model
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))

运行:

step:500, entropy_loss:0.242228, l2_loss:2572.453369, total_loss:0.3451270.9404
step:1000, entropy_loss:0.085225, l2_loss:2051.850342, total_loss:0.1672990.9751
step:1500, entropy_loss:0.087876, l2_loss:1579.760864, total_loss:0.1510670.9772
step:2000, entropy_loss:0.018950, l2_loss:1289.336548, total_loss:0.0705240.983
step:2500, entropy_loss:0.017045, l2_loss:1099.483887, total_loss:0.0610240.982
step:3000, entropy_loss:0.007257, l2_loss:973.254883, total_loss:0.0461880.9826

你可能感兴趣的:(tensorflow,深度学习,Tensorflow实战)