BP(back propagation)神经网络是1986年由Rumelhart和McClelland为首的科学家提出的概念,是一种按照误差逆向传播算法训练的多层前馈神经网络,是目前应用最广泛的神经网络。
20世纪80年代中期,David Runelhart。Geoffrey Hinton和Ronald W-llians、DavidParker等人分别独立发现了误差反向传播算法(Error Back Propagation Training),简称BP,系统解决了多层神经网络隐含层连接权学习问题,并在数学上给出了完整推导。人们把采用这种算法进行误差校正的多层前馈网络称为BP网。
BP神经网络具有任意复杂的模式分类能力和优良的多维函数映射能力,解决了简单感知器不能解决的异或(Exclusive、OR、XOR)和一些其他问题。从结构上讲,BP网络具有输入层、隐藏层和输出层;从本质上讲,BP算法就是以网络误差平方为目标函数、采用梯度下降法来计算目标函数的最小值。
——引用自百度百科
对于BP网络中的反向传播算法等数学基础知识,我们在此不做详细讨论,读者可以自行查阅相关资料
# 设置学习率
learning_rate = 0.01
# 设置训练次数
train_steps = 1000
import tensorflow.examples.tutorials.mnist.input_data as input_data
mnist = input_data.read_data_sets("", one_hot = True)
with tf.name_scope('data'):
# 可修改批处理数
x_data ,y_data = mnist.train.next_batch(50)
我们构建一个5层的网络,其中每一层的
with tf.name_scope('Input'):
# Input layer with 256 nodes
input1 = tf.placeholder(tf.float32, [None, 784])
weight1 = tf.Variable(tf.ones([784, 256]))
bias1 = tf.Variable(tf.ones([256]))
output1 = tf.add(tf.matmul(input1, weight1), bias1)
with tf.name_scope('Layer1'):
# layer1 with 10 nodes
weight2 = tf.Variable(tf.ones([256, 10]))
bias2 = tf.Variable(tf.ones([10]))
output2 = tf.add(tf.matmul(output1, weight2), bias2)
with tf.name_scope('Layer2'):
# layer2 with 10 nodes
weight3 = tf.Variable(tf.ones([10, 10]))
bias3 = tf.Variable(tf.ones([10]))
output3 = tf.add(tf.matmul(output2, weight3), bias3)
with tf.name_scope('Layer3'):
# layer4 with 10 nodes
weight4 = tf.Variable(tf.ones([10, 10]))
bias4 = tf.Variable(tf.ones([10]))
output4 = tf.add(tf.matmul(output3, weight4), bias4)
with tf.name_scope('layer4'):
# layer4 with 10 nodes
weight5 = tf.Variable(tf.ones([10, 10]))
bias5 = tf.Variable(tf.ones([10]))
output5 = tf.add(tf.matmul(output4, weight5), bias5)
# output
with tf.name_scope('Prediction'):
weight6 = tf.sigmoid(tf.Variable(tf.ones([10, 10])))
bias6 = tf.Variable(tf.ones([10]))
output6 = tf.add(tf.nn.softmax(tf.matmul(output5, weight6)), bias6)
Target = tf.placeholder(tf.float32, [None, 10])
# 采用交叉熵作为损失函数
with tf.name_scope('Loss'):
loss = -tf.reduce_mean(Target * tf.log(output6))
with tf.name_scope('Train'):
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
with tf.name_scope('Init'):
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
l = []
start_time = time.time()
# cycle for 1000 times
for i in range(1000):
sess.run(train_op, feed_dict={input1: x_data, Target: y_data})
lo = sess.run(loss, feed_dict={input1: x_data, Target: y_data})
print(lo)
l.append(lo)
end_time = time.time()
print('time: ', (end_time - start_time))
start_time = end_time
print("Optimization Finished!")
plt.plot(l)
plt.xlabel('The sampling point')
plt.ylabel('loss')
plt.title("The variation of the loss")
plt.grid(True)
plt.show()
# 写入日志文件,可自行指定路径
writer = tf.summary.FileWriter("logs/", sess.graph)
# 设置学习率
learning_rate = 0.01
# 设置训练次数
train_steps = 1000
impor tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
mnist = input_data.read_data_sets("", one_hot = True)
with tf.name_scope('data'):
# 可修改批处理数
x_data ,y_data = mnist.train.next_batch(50)
with tf.name_scope('Input'):
# 256个节点
input1 = tf.placeholder(tf.float32, [None, 784])
weight1 = tf.Variable(tf.ones([784, 256]))
bias1 = tf.Variable(tf.ones([256]))
output1 = tf.add(tf.matmul(input1, weight1), bias1)
with tf.name_scope('Layer1'):
# 10个节点
weight2 = tf.Variable(tf.ones([256, 10]))
bias2 = tf.Variable(tf.ones([10]))
output2 = tf.add(tf.matmul(output1, weight2), bias2)
with tf.name_scope('Layer2'):
# 10个节点
weight3 = tf.Variable(tf.ones([10, 10]))
bias3 = tf.Variable(tf.ones([10]))
output3 = tf.add(tf.matmul(output2, weight3), bias3)
with tf.name_scope('Layer3'):
# 10个节点
weight4 = tf.Variable(tf.ones([10, 10]))
bias4 = tf.Variable(tf.ones([10]))
output4 = tf.add(tf.matmul(output3, weight4), bias4)
with tf.name_scope('layer4'):
# 10个节点
weight5 = tf.Variable(tf.ones([10, 10]))
bias5 = tf.Variable(tf.ones([10]))
output5 = tf.add(tf.matmul(output4, weight5), bias5)
# 输出
with tf.name_scope('Prediction'):
weight6 = tf.sigmoid(tf.Variable(tf.ones([10, 10])))
bias6 = tf.Variable(tf.ones([10]))
output6 = tf.add(tf.nn.softmax(tf.matmul(output5, weight6)), bias6)
Target = tf.placeholder(tf.float32, [None, 10])
# 采用交叉熵作为损失函数
with tf.name_scope('Loss'):
loss = -tf.reduce_mean(Target * tf.log(output6))
# 定义训练操作
with tf.name_scope('Train'):
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
with tf.name_scope('Init'):
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
l = []
start_time = time.time()
# 循环1000次
for i in range(1000):
sess.run(train_op, feed_dict={input1: x_data, Target: y_data})
lo = sess.run(loss, feed_dict={input1: x_data, Target: y_data})
print(lo)
l.append(lo)
end_time = time.time()
print('time: ', (end_time - start_time))
start_time = end_time
print("Optimization Finished!")
plt.plot(l)
plt.xlabel('The sampling point')
plt.ylabel('loss')
plt.title("The variation of the loss")
plt.grid(True)
plt.show()
# 写入日志文件,可自行指定路径
writer = tf.summary.FileWriter("logs/", sess.graph)
本文介绍了如何使用TensorFlow建立普通的BP神经网络模型,读者可通过修改模型内的隐藏层层数、隐藏层节点数、每一层的激活函数、训练迭代次数、学习率等因素调整模型,有任何的问题请在评论区留言,我会尽快回复,谢谢支持。下一节,我们开始介绍如何使用TensorFlow建立卷积神经网络模型以更好地处理和识别图像数据集。
下一节:TensorFlow实践(10)——卷积神经网络模型LeNet5