在回归问题中,我们最常用 平方误差代价函数(quadratic cost)
#二次代价函数
loss = tf.reduce_mean(tf.square(y-prediction))
如:TensorFlow1.0系统学习(三)————使用传统神经网络和Softmax函数训练MNIST数据集
训练结果:
epoch: 0,Training Accuracy: 0.81716365,Testing Accuracy: 0.8303
epoch: 1,Training Accuracy: 0.85865456,Testing Accuracy: 0.8706
epoch: 2,Training Accuracy: 0.8716,Testing Accuracy: 0.8813
epoch: 3,Training Accuracy: 0.87887275,Testing Accuracy: 0.8882
epoch: 4,Training Accuracy: 0.8842,Testing Accuracy: 0.8945
epoch: 5,Training Accuracy: 0.88754547,Testing Accuracy: 0.8963
epoch: 6,Training Accuracy: 0.89183635,Testing Accuracy: 0.8992
epoch: 7,Training Accuracy: 0.8936727,Testing Accuracy: 0.902
epoch: 8,Training Accuracy: 0.89616364,Testing Accuracy: 0.9037
epoch: 9,Training Accuracy: 0.8978909,Testing Accuracy: 0.9052
epoch: 10,Training Accuracy: 0.89967275,Testing Accuracy: 0.9066
epoch: 11,Training Accuracy: 0.9012727,Testing Accuracy: 0.9072
epoch: 12,Training Accuracy: 0.9019455,Testing Accuracy: 0.9087
epoch: 13,Training Accuracy: 0.9034727,Testing Accuracy: 0.9101
epoch: 14,Training Accuracy: 0.90425456,Testing Accuracy: 0.9098
epoch: 15,Training Accuracy: 0.9055273,Testing Accuracy: 0.9115
epoch: 16,Training Accuracy: 0.9062182,Testing Accuracy: 0.9118
epoch: 17,Training Accuracy: 0.9066909,Testing Accuracy: 0.9126
epoch: 18,Training Accuracy: 0.90770906,Testing Accuracy: 0.9133
epoch: 19,Training Accuracy: 0.90812725,Testing Accuracy: 0.9139
epoch: 20,Training Accuracy: 0.90967274,Testing Accuracy: 0.9133
在神经网络中,我们主要还是使用 交叉熵代价函数(cross-entropy)
#交叉熵代价函数(cross-entropy)和softmax搭配
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=prediction))
#交叉熵代价函数(cross-entropy)和sigmoid搭配
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=prediction))
替换后训练结果:
epoch: 0,Training Accuracy: 0.81436366,Testing Accuracy: 0.8244
epoch: 1,Training Accuracy: 0.88263637,Testing Accuracy: 0.8917
epoch: 2,Training Accuracy: 0.8939273,Testing Accuracy: 0.9001
epoch: 3,Training Accuracy: 0.89916366,Testing Accuracy: 0.9057
epoch: 4,Training Accuracy: 0.90272725,Testing Accuracy: 0.9087
epoch: 5,Training Accuracy: 0.9051091,Testing Accuracy: 0.91
epoch: 6,Training Accuracy: 0.90714544,Testing Accuracy: 0.911
epoch: 7,Training Accuracy: 0.90845454,Testing Accuracy: 0.9135
epoch: 8,Training Accuracy: 0.9106727,Testing Accuracy: 0.9155
epoch: 9,Training Accuracy: 0.91203636,Testing Accuracy: 0.9162
epoch: 10,Training Accuracy: 0.91276366,Testing Accuracy: 0.9177
epoch: 11,Training Accuracy: 0.91383636,Testing Accuracy: 0.9186
epoch: 12,Training Accuracy: 0.91483635,Testing Accuracy: 0.9187
epoch: 13,Training Accuracy: 0.9154364,Testing Accuracy: 0.9194
epoch: 14,Training Accuracy: 0.9162545,Testing Accuracy: 0.9192
epoch: 15,Training Accuracy: 0.91670907,Testing Accuracy: 0.9202
epoch: 16,Training Accuracy: 0.9174,Testing Accuracy: 0.9203
epoch: 17,Training Accuracy: 0.9181455,Testing Accuracy: 0.9209
epoch: 18,Training Accuracy: 0.9187818,Testing Accuracy: 0.9209
epoch: 19,Training Accuracy: 0.9192182,Testing Accuracy: 0.9217
epoch: 20,Training Accuracy: 0.92,Testing Accuracy: 0.9219
可以看出替换后,有原来的7步才到0.90变成只要2步就到了0.90
注意:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# 载入数据集
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
# 每个批次的大小
batch_size = 100
# 计算一共有多少个批次
n_batch = mnist.train.num_examples // batch_size
# 定义两个placeholder
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)
# 创建3层中间层的神经网络
W1 = tf.Variable(tf.truncated_normal([784, 2000], stddev=0.1))
b1 = tf.Variable(tf.zeros([2000])+0.1)
L1 = tf.nn.tanh(tf.matmul(x, W1)+b1)
L1_drop = tf.nn.dropout(L1, keep_prob)
W2 = tf.Variable(tf.truncated_normal([2000, 2000], stddev=0.1))
b2 = tf.Variable(tf.zeros([2000])+0.1)
L2 = tf.nn.tanh(tf.matmul(L1_drop, W2)+b2)
L2_drop = tf.nn.dropout(L2, keep_prob)
W3 = tf.Variable(tf.truncated_normal([2000, 1000], stddev=0.1))
b3 = tf.Variable(tf.zeros([1000])+0.1)
L3 = tf.nn.tanh(tf.matmul(L2_drop, W3)+b3)
L3_drop = tf.nn.dropout(L3, keep_prob)
W4 = tf.Variable(tf.truncated_normal([1000, 10], stddev=0.1))
b4 = tf.Variable(tf.zeros([10])+0.1)
prediction = tf.nn.softmax(tf.matmul(L3_drop, W4) + 4)
# 交叉熵代价函数
#loss = tf.reduce_mean(tf.square(y - prediction))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=prediction))
# 使用梯度下降法
train_step = tf.train.GradientDescentOptimizer(0.2).minimize(loss)
init = tf.global_variables_initializer()
# 结果存放在一个布尔型列表中
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(prediction, 1)) # argmax返回一维张量中最大的值所在的位置
# 求准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # cast把布尔类型转成浮点型,True为1.0,False为0
with tf.Session() as sess:
sess.run(init)
for epoch in range(21): # 训练21个周期
for batch in range(n_batch): # 训练所有的图片一次
batch_xs, batch_ys = mnist.train.next_batch(batch_size) # 获取batch_size大小的图片
sess.run(train_step, feed_dict={x: batch_xs, y: batch_ys, keep_prob:1.0})
test_acc = sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels, keep_prob:1.0})
train_acc = sess.run(accuracy, feed_dict={x: mnist.train.images, y: mnist.train.labels, keep_prob:1.0})
print("epoch: " + str(epoch) + ",Training Accuracy: " + str(train_acc) + ",Testing Accuracy: " + str(test_acc))
结果:
epoch: 0,Training Accuracy: 0.93381816,Testing Accuracy: 0.9273
epoch: 1,Training Accuracy: 0.9709455,Testing Accuracy: 0.9559
epoch: 2,Training Accuracy: 0.9824,Testing Accuracy: 0.9616
epoch: 3,Training Accuracy: 0.98654544,Testing Accuracy: 0.9656
epoch: 4,Training Accuracy: 0.9885091,Testing Accuracy: 0.9672
epoch: 5,Training Accuracy: 0.98987275,Testing Accuracy: 0.9691
epoch: 6,Training Accuracy: 0.9908,Testing Accuracy: 0.9694
epoch: 7,Training Accuracy: 0.9918182,Testing Accuracy: 0.9705
epoch: 8,Training Accuracy: 0.9924909,Testing Accuracy: 0.9707
可以看出,这个网络过于复杂,在我们少量的训练集下变现成 高方差(训练集和测试集的准确率相差较大)的情况,为过拟合。
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# 载入数据集
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
# 每个批次的大小
batch_size = 100
# 计算一共有多少个批次
n_batch = mnist.train.num_examples // batch_size
# 定义两个placeholder
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)
# 创建3层中间层的神经网络
W1 = tf.Variable(tf.truncated_normal([784, 2000], stddev=0.1))
b1 = tf.Variable(tf.zeros([2000])+0.1)
L1 = tf.nn.tanh(tf.matmul(x, W1)+b1)
L1_drop = tf.nn.dropout(L1, keep_prob)
W2 = tf.Variable(tf.truncated_normal([2000, 2000], stddev=0.1))
b2 = tf.Variable(tf.zeros([2000])+0.1)
L2 = tf.nn.tanh(tf.matmul(L1_drop, W2)+b2)
L2_drop = tf.nn.dropout(L2, keep_prob)
W3 = tf.Variable(tf.truncated_normal([2000, 1000], stddev=0.1))
b3 = tf.Variable(tf.zeros([1000])+0.1)
L3 = tf.nn.tanh(tf.matmul(L2_drop, W3)+b3)
L3_drop = tf.nn.dropout(L3, keep_prob)
W4 = tf.Variable(tf.truncated_normal([1000, 10], stddev=0.1))
b4 = tf.Variable(tf.zeros([10])+0.1)
prediction = tf.nn.softmax(tf.matmul(L3_drop, W4) + 4)
# 二次代价函数
#loss = tf.reduce_mean(tf.square(y - prediction))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=prediction))
# 使用梯度下降法
train_step = tf.train.GradientDescentOptimizer(0.2).minimize(loss)
init = tf.global_variables_initializer()
# 结果存放在一个布尔型列表中
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(prediction, 1)) # argmax返回一维张量中最大的值所在的位置
# 求准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # cast把布尔类型转成浮点型,True为1.0,False为0
with tf.Session() as sess:
sess.run(init)
for epoch in range(21): # 训练21个周期
for batch in range(n_batch): # 训练所有的图片一次
batch_xs, batch_ys = mnist.train.next_batch(batch_size) # 获取batch_size大小的图片
sess.run(train_step, feed_dict={x: batch_xs, y: batch_ys, keep_prob:0.7})
test_acc = sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels, keep_prob:1.0})
train_acc = sess.run(accuracy, feed_dict={x: mnist.train.images, y: mnist.train.labels, keep_prob:1.0})
print("epoch: " + str(epoch) + ",Training Accuracy: " + str(train_acc) + ",Testing Accuracy: " + str(test_acc))
输出:
epoch: 0,Training Accuracy: 0.9118,Testing Accuracy: 0.9178
epoch: 1,Training Accuracy: 0.9277818,Testing Accuracy: 0.9348
epoch: 2,Training Accuracy: 0.9360727,Testing Accuracy: 0.937
epoch: 3,Training Accuracy: 0.94063634,Testing Accuracy: 0.9428
epoch: 4,Training Accuracy: 0.9456546,Testing Accuracy: 0.9469
epoch: 5,Training Accuracy: 0.94754547,Testing Accuracy: 0.9473
epoch: 6,Training Accuracy: 0.9514727,Testing Accuracy: 0.9504
epoch: 7,Training Accuracy: 0.95436364,Testing Accuracy: 0.9504
epoch: 8,Training Accuracy: 0.95585454,Testing Accuracy: 0.9517
epoch: 9,Training Accuracy: 0.95885456,Testing Accuracy: 0.9548
epoch: 10,Training Accuracy: 0.9595091,Testing Accuracy: 0.9554
epoch: 11,Training Accuracy: 0.96205455,Testing Accuracy: 0.9578
epoch: 12,Training Accuracy: 0.9633818,Testing Accuracy: 0.9596
epoch: 13,Training Accuracy: 0.9644727,Testing Accuracy: 0.96
epoch: 14,Training Accuracy: 0.9660182,Testing Accuracy: 0.96
epoch: 15,Training Accuracy: 0.96705455,Testing Accuracy: 0.964
epoch: 16,Training Accuracy: 0.96689093,Testing Accuracy: 0.9628
epoch: 17,Training Accuracy: 0.96834546,Testing Accuracy: 0.964
epoch: 18,Training Accuracy: 0.96874547,Testing Accuracy: 0.9637
epoch: 19,Training Accuracy: 0.97034544,Testing Accuracy: 0.9673
epoch: 20,Training Accuracy: 0.9706,Testing Accuracy: 0.9664
可以看出,训练集误差和测试集误差相差较小,此时存在偏差,不存在高方差了,不存在过拟合了。但是网络收敛速度变慢了。
tf.train.GradientDescentOptimizer
tf.train.AdadeltaOptimizer
tf.train.AdagradOptimizer
tf.train.AdagradDAOptimizer
tf.train.MomentumOptimizer
tf.train.AdamOptimizer
tf.train.FtrlOptimizer
tf.train.ProximalGradientDescentOptimizer
tf.train.ProximalAdagradOptimizer
tf.train.RMSPropOptimizer
各种优化器对比:
SGD优化器容易逃离不出 鞍点。
在实验中,先找一个速度最快的优化器进行试验,然后再去尝试使用不同的优化器来进行准确率的判断,因为你永远不知道用哪个优化器准确率最高。
使用tf.train.AdamOptimizer
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# 载入数据集
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
# 每个批次的大小
batch_size = 100
# 计算一共有多少个批次
n_batch = mnist.train.num_examples // batch_size
# 定义两个placeholder
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])
# 创建一个简单的神经网络
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
prediction = tf.nn.softmax(tf.matmul(x, W) + b)
# 交叉熵代价函数
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=prediction))
# 使用梯度下降法
#train_step = tf.train.GradientDescentOptimizer(0.2).minimize(loss)
train_step = tf.train.AdamOptimizer(0.01).minimize(loss)
init = tf.global_variables_initializer()
# 结果存放在一个布尔型列表中
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(prediction, 1)) # argmax返回一维张量中最大的值所在的位置
# 求准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # cast把布尔类型转成浮点型,True为1.0,False为0
with tf.Session() as sess:
sess.run(init)
for epoch in range(21): # 训练21个周期
for batch in range(n_batch): # 训练所有的图片一次
batch_xs, batch_ys = mnist.train.next_batch(batch_size) # 获取batch_size大小的图片
sess.run(train_step, feed_dict={x: batch_xs, y: batch_ys})
test_acc = sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels})
train_acc = sess.run(accuracy, feed_dict={x: mnist.train.images, y: mnist.train.labels})
print("epoch: " + str(epoch) + ",Training Accuracy: " + str(train_acc) + ",Testing Accuracy: " + str(test_acc))
结果:
epoch: 0,Training Accuracy: 0.92152727,Testing Accuracy: 0.9229
epoch: 1,Training Accuracy: 0.9266,Testing Accuracy: 0.9262
epoch: 2,Training Accuracy: 0.9316,Testing Accuracy: 0.9291
epoch: 3,Training Accuracy: 0.934,Testing Accuracy: 0.9305
epoch: 4,Training Accuracy: 0.93456364,Testing Accuracy: 0.9299
epoch: 5,Training Accuracy: 0.93685454,Testing Accuracy: 0.9314
epoch: 6,Training Accuracy: 0.9318909,Testing Accuracy: 0.9263
epoch: 7,Training Accuracy: 0.9387636,Testing Accuracy: 0.9314
epoch: 8,Training Accuracy: 0.93863636,Testing Accuracy: 0.9315
epoch: 9,Training Accuracy: 0.93954545,Testing Accuracy: 0.9291
epoch: 10,Training Accuracy: 0.94143635,Testing Accuracy: 0.9299
epoch: 11,Training Accuracy: 0.94165456,Testing Accuracy: 0.9293
epoch: 12,Training Accuracy: 0.94110906,Testing Accuracy: 0.9297
epoch: 13,Training Accuracy: 0.9412182,Testing Accuracy: 0.9286
epoch: 14,Training Accuracy: 0.9425091,Testing Accuracy: 0.9308
epoch: 15,Training Accuracy: 0.94414544,Testing Accuracy: 0.9306
epoch: 16,Training Accuracy: 0.9446545,Testing Accuracy: 0.9327
epoch: 17,Training Accuracy: 0.94505453,Testing Accuracy: 0.9318
epoch: 18,Training Accuracy: 0.94583637,Testing Accuracy: 0.9302
epoch: 19,Training Accuracy: 0.94692725,Testing Accuracy: 0.9316
epoch: 20,Training Accuracy: 0.94692725,Testing Accuracy: 0.9309
收敛速度明显比随机梯度下降优化器快很多,准确率也有所提高。
接下来想办法提高准确率到98%以上。
# 先定义学习率为变量
lr = tf.Variable(0.001, dtype=tf.float32))
# 更新学习率
sess.run(tf.assign(lr, 0.001*(0.95 ** epoch)))
改进代码,准确率提升至98%以上
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# 载入数据集
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
# 每个批次的大小
batch_size = 100
# 计算一共有多少个批次
n_batch = mnist.train.num_examples // batch_size
# 定义两个placeholder
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])
lr = tf.Variable(0.001, dtype=tf.float32)
keep_prob = tf.placeholder(tf.float32)
# 创建一个简单的神经网络
W1 = tf.Variable(tf.truncated_normal([784, 500], stddev=0.1))
b1 = tf.Variable(tf.zeros([500] )+ 0.1)
L1 = tf.nn.tanh(tf.matmul(x, W1) + b1)
L1_drop = tf.nn.dropout(L1, keep_prob)
W2 = tf.Variable(tf.truncated_normal([500, 300], stddev=0.1))
b2 = tf.Variable(tf.zeros([300]) + 0.1)
L2 = tf.nn.tanh(tf.matmul(L1_drop, W2) + b2)
L2_drop = tf.nn.dropout(L2, keep_prob)
W3 = tf.Variable(tf.truncated_normal([300, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]) + 0.1)
prediction = tf.nn.softmax(tf.matmul(L2_drop, W3) + b3)
# 交叉熵代价函数
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=prediction))
# 使用梯度下降法
train_step = tf.train.AdamOptimizer(lr).minimize(loss)
init = tf.global_variables_initializer()
# 结果存放在一个布尔型列表中
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(prediction, 1)) # argmax返回一维张量中最大的值所在的位置
# 求准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # cast把布尔类型转成浮点型,True为1.0,False为0
with tf.Session() as sess:
sess.run(init)
for epoch in range(51): # 训练21个周期
sess.run(tf.assign(lr, 0.001 * (0.95 ** epoch)))
for batch in range(n_batch): # 训练所有的图片一次
batch_xs, batch_ys = mnist.train.next_batch(batch_size) # 获取batch_size大小的图片
sess.run(train_step, feed_dict={x: batch_xs, y: batch_ys, keep_prob:1.0
})
Learning_rate = sess.run(lr)
test_acc = sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels, keep_prob:1.0 })
train_acc = sess.run(accuracy, feed_dict={x: mnist.train.images, y: mnist.train.labels, keep_prob:1.0})
print("epoch: " + str(epoch) + ",Training Accuracy: " + str(train_acc) + ",Testing Accuracy: " + str(test_acc) + ",Learning Rate: " + str(Learning_rate))
结果:
epoch: 43,Training Accuracy: 0.9945273,Testing Accuracy: 0.9797,Learning Rate: 0.00011018311
epoch: 44,Training Accuracy: 0.9945273,Testing Accuracy: 0.9805,Learning Rate: 0.000104673956
epoch: 45,Training Accuracy: 0.99463636,Testing Accuracy: 0.9805,Learning Rate: 9.944026e-05
epoch: 46,Training Accuracy: 0.9947091,Testing Accuracy: 0.98,Learning Rate: 9.446825e-05
epoch: 47,Training Accuracy: 0.9948909,Testing Accuracy: 0.9804,Learning Rate: 8.974483e-05
epoch: 48,Training Accuracy: 0.99496365,Testing Accuracy: 0.9802,Learning Rate: 8.525759e-05
epoch: 49,Training Accuracy: 0.99496365,Testing Accuracy: 0.9808,Learning Rate: 8.099471e-05
epoch: 50,Training Accuracy: 0.995,Testing Accuracy: 0.9803,Learning Rate: 7.6944976e-05