深度学习------循环神经网络(RNN和LSTM):mnist手写数据集

1. RNN循环神经网络

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data #导入数据包
from tensorflow.contrib.layers import fully_connected  #全连接层
import tensorflow
mnist=input_data.read_data_sets('mnist_data')
x_data=mnist.train.images
y_data=mnist.train.labels

# 参数
n_steps = 28 #时间步数(序列长度)
n_inputs = 28 #输入数据长度
n_neurons = 100 #隐藏状态,神经元个数
n_outputs = 10 #输出10分类

# 1.定义占位符
x=tf.placeholder(tf.float32,shape=[None,n_steps,n_inputs])
y=tf.placeholder(tf.int32,shape=[None])
#2.定义循环神经网络层
basic_cell=tf.nn.rnn_cell.BasicRNNCell(num_units=100)
outputs,states=tf.nn.dynamic_rnn(basic_cell,x,dtype=tf.float32)
#3.全连接层
logits=fully_connected(outputs[:,-1],n_outputs,activation_fn=None)

#4.代价函数
cost=tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=y))
#5.梯度下降函数
optimiter=tf.train.AdamOptimizer(learning_rate=0.000971).minimize(cost)

#6.精度
correct=tf.nn.in_top_k(logits,y,1)
accuracy=tf.reduce_mean(tf.cast(correct,tf.float32))

#7.初始化全局变量
# sess=tf.Session()
# sess.run(tf.global_variables_initializer())
train_count=15#训练大周期
batch_size=100#mini-batch批大小
print('开始训练')

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epo in range(train_count):
        total_batch=mnist.train.num_examples//batch_size
        for i in range(total_batch):
            batch_x,batch_y=mnist.train.next_batch(batch_size)
            batch_x=batch_x.reshape([-1,28,28])
            sess.run(optimiter,feed_dict={x:batch_x,y:batch_y})
        train_acc=accuracy.eval(feed_dict={x:batch_x,y:batch_y})
        test_acc=accuracy.eval(feed_dict={x:mnist.test.images.reshape([-1,28,28]),y:mnist.test.labels})
        print(epo,"Train accuracy:",train_acc,"Test accuracy:",test_acc)
print('训练结束')

结果展示:
0 Train accuracy: 0.91 Test accuracy: 0.9147
1 Train accuracy: 0.94 Test accuracy: 0.938
2 Train accuracy: 0.97 Test accuracy: 0.9536
3 Train accuracy: 0.93 Test accuracy: 0.9545
4 Train accuracy: 0.97 Test accuracy: 0.9577
5 Train accuracy: 0.98 Test accuracy: 0.9607
6 Train accuracy: 0.98 Test accuracy: 0.9598
7 Train accuracy: 0.97 Test accuracy: 0.9623
8 Train accuracy: 0.97 Test accuracy: 0.9714
9 Train accuracy: 0.96 Test accuracy: 0.9651
10 Train accuracy: 0.96 Test accuracy: 0.9709
11 Train accuracy: 0.98 Test accuracy: 0.9741
12 Train accuracy: 0.98 Test accuracy: 0.9711
13 Train accuracy: 0.99 Test accuracy: 0.9698
14 Train accuracy: 0.97 Test accuracy: 0.9706

2. LSTM循环神经网络

import tensorflow as tf
from tensorflow.contrib.layers import fully_connected
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt

mnist=input_data.read_data_sets('mnist_data')
x_data=mnist.train.images
y_data=mnist.train.labels
n_layers=4#层数
n_inputs=28#输入数据长度(特征)
n_steps=28#时间步数(序列长度T)
#1.定义占位符
#第一个n_steps=28为时间序列,第二个n_inputs=28为输入数据长度
x=tf.placeholder(tf.float32,shape=[None,n_steps,n_inputs])
y=tf.placeholder(tf.int32,shape=[None])

#2.多时间不长
# basic_cell=tf.nn.rnn_cell.BasicRNNCell(num_units=100)#100为隐藏层数
lstm_cell=[tf.nn.rnn_cell.LSTMCell(num_units=100) for layer in range(n_layers)]#100为隐藏层数
multi_cell=tf.nn.rnn_cell.MultiRNNCell(lstm_cell)
outputs,states=tf.nn.dynamic_rnn(multi_cell,x,dtype=tf.float32)

#3.全连接层
logits=fully_connected(outputs[:,-1],10,activation_fn=None)

#4.代价和损失函数
cost=tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=y))
optimiter=tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)

#5.精度
correct=tf.nn.in_top_k(logits,y,1)
accuracy=tf.reduce_mean(tf.cast(correct,tf.float32))
#6.初始化全局变量和训练
print('开始训练')
train_count=10
batch_size=100
ls=[]
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epo in range(train_count):
        avg_cost=0
        total_batch=int(mnist.train.num_examples/batch_size)
        for i in range(total_batch):
            batch_x,batch_y=mnist.train.next_batch(batch_size)
            batch_x=batch_x.reshape([-1,n_steps,n_inputs])
            cost_val,_=sess.run([cost,optimiter],feed_dict={x:batch_x,y:batch_y})
            avg_cost++cost_val/total_batch
        ls.append(avg_cost)
        train_acc=accuracy.eval(feed_dict={x:batch_x,y:batch_y})
        test_acc=accuracy.eval(feed_dict={x:mnist.test.images.reshape([-1,n_steps,n_inputs]),y:mnist.test.labels})
        print(epo,'train_acc',train_acc,'test_acc',test_acc)
plt.plot(ls)
plt.show()
print('结束训练')
结果展示:
0 train_acc 0.97 test_acc 0.9525
1 train_acc 0.97 test_acc 0.9676
2 train_acc 0.99 test_acc 0.9782
3 train_acc 0.99 test_acc 0.9775
4 train_acc 0.97 test_acc 0.9816
5 train_acc 1.0 test_acc 0.9783
6 train_acc 0.99 test_acc 0.983
7 train_acc 0.98 test_acc 0.9861
8 train_acc 1.0 test_acc 0.9873
9 train_acc 1.0 test_acc 0.9875

注意:普通RNN具有梯度消失的问题,只能处理短序列的数据;而改进后的长短期记忆网络(LSTM)能够处理长序列的数据,避免了梯度消失。

你可能感兴趣的:(rnn,深度学习,lstm)