import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
from tensorflow.examples.tutorials.mnist import input_data
加载数据
mnist = input_data.read_data_sets('/tmp/data',one_hot=True)
Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
参数
# 训练参数
learning_rate = 0.001
training_steps = 10000
batch_size = 128
dispaly_step = 200
# 网络参数
# 输入的图片尺寸为28*28
num_input = 28
timesteps = 28
num_hidden = 256 # LSTM的隐藏状态的维度
num_classes = 10
定义计算图
# 输入
X = tf.placeholder(tf.float32,[None,timesteps,num_input])
Y = tf.placeholder(tf.float32,[None,num_classes])
# 权重
weights = {'out':tf.Variable(tf.random_normal([num_hidden,num_classes]))}
biases = {'out':tf.Variable(tf.random_normal([num_classes]))}
def LSTM(x,weights,biases):
# tf.unstack(x,timesteps,1):沿着axis=1将x切分成timesteps份
# 在unstack前,x.shape = (batch_size,timesteps,num_input)
# 在unstack后,x是一个长度为timesteps的list
x = tf.unstack(x,timesteps,1)
lstm_cell = tf.nn.rnn_cell.LSTMCell(num_hidden,forget_bias=1.0)
# outputs是该LSTM每个cell的输出,因此其长度为timesteps,其每一个输出的维度为num_hidden
# final_states[0]是cell_state,final_states[1]是hidden_state
outputs,final_states = rnn.static_rnn(lstm_cell,x,dtype=tf.float32)
return tf.matmul(outputs[-1],weights['out'])+biases['out']
logits = LSTM(X,weights,biases)
prediction = tf.nn.softmax(logits)
# 定义loss和优化器
loss_op = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,labels=Y)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# 评估模型
correct_pred = tf.equal(tf.argmax(prediction,1),tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))
init = tf.global_variables_initializer()
训练
with tf.Session() as sess:
sess.run(init)
for step in range(1,training_steps+1):
batch_x,batch_y = mnist.train.next_batch(batch_size)
batch_x = batch_x.reshape((batch_size,timesteps,num_input))
sess.run(train_op,feed_dict={X:batch_x,Y:batch_y})
if step % dispaly_step == 0 or step ==1:
loss,acc = sess.run([loss_op,accuracy],feed_dict={X:batch_x,Y:batch_y})
loss = np.mean(loss)
acc = np.mean(acc)
print("Step " + str(step) + ", Minibatch Loss= " + "{:.4f}".format(loss) + ", Training Accuracy= " + "{:.3f}".format(acc))
print("Optimization Finished!")
test_len = 128
test_data = mnist.test.images[:test_len].reshape((-1, timesteps, num_input))
test_label = mnist.test.labels[:test_len]
test_acc = sess.run(accuracy, feed_dict={X: test_data, Y: test_label})
print("Testing Accuracy:", np.mean(test_acc))
Step 1, Minibatch Loss= 9.4089, Training Accuracy= 0.133
Step 200, Minibatch Loss= 1.7707, Training Accuracy= 0.375
......
Step 9800, Minibatch Loss= 0.0012, Training Accuracy= 1.000
Step 10000, Minibatch Loss= 0.0048, Training Accuracy= 1.000
Optimization Finished!
Testing Accuracy: 0.984375