import numpy as np
import tensorflow as tf
from matplotlib.pyplot as plt
HIDDEN_SIZE = 30 # Lstm中隐藏节点的个数
NUM_LAYERS = 2 # LSTM的层数
TIMESTEPS = 10 # 循环神经网络的截断长度
TRAINING_STEPS = 10000 # 训练轮数
BATCH_SIZE = 32 # batch大小
TRAINING_EXAMPLES = 10000 # 训练数据个数
TESTING_EXAMPLES = 1000 # 测试数据个数
SAMPLE_GAP = 0.01 # 采样间隔
# 定义生成正弦数据的函数
def generate_data(seq):
X = []
Y = []
# 序列的第i项和后面的TIMESTEPS-1项合在一起作为输入;第i+TIMESTEPS项作为输出
# 即用sin函数前面的TIMESTPES个点的信息,预测第i+TIMESTEPS个点的函数值
for i in range(len(seq) - TIMESTEPS - 1):
X.append([seq[i:i + TIMESTEPS]])
Y.append([seq[i + TIMESTEPS]])
return np.array(X, dtype=np.float32), np.array(Y, dtype=np.float32)
#定义LSTM结构
def lstm(X, Y, training):
#单层LSTM结构
# cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_SIZE)
#多层LSTM结构
cell = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.BasicLSTMCell(HIDDEN_SIZE) for _ in range(NUM_LAYERS)])
outputs, _ = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
#outputs是顶层LSTM在每一步的输出结果,维度为[BATCH_SIZE,TIMESTEPS,HIDDEN_SIZE]
#本问题只关注最后一个时刻的输出结果output
output = outputs[:, -1, :]
pred = tf.contrib.layers.fully_connected(output, 1, activation_fn=None)
#训练时会计算损失函数以及优化,测试时直接返回结果
if not training:
return pred,None,None
#计算损失函数
loss = tf.losses.mean_squared_error(labels=Y,predictions=pred)
#优化函数
op = tf.contrib.layers.optimize_loss(loss, tf.train.get_global_step(), optimizer="Adagrad",learning_rate=0.1)
return pred, loss, op
def train(sess,train_x, train_y):
#将训练集数据给计算图
ds = tf.data.Dataset.from_tensor_slices((train_x, train_y))
#shuffle一下
ds = ds.repeat().shuffle(1000).batch(BATCH)
X,Y = ds.make_one_shot_iterator().get_next()
#调用LSTM网络,得到预测结果、损失函数
with tf.variable_scope("prediction_sin"):
pred, loss, op = lstm(X, Y, True)
#初始化变量
sess.run(tf.global_variables_initializer())
for i in range(TRAING_STEPS):
_, l =sess.run([op, loss])
if i%200 ==0:
print("train step:" + str(i) + ",loss: " + str(l))
def run_eval(sess, test_x, test_y):
ds = tf.data.Dataset.from_tensor_slices((test_x, test_y))
ds = ds.batch(1)
X, Y = ds.make_one_shot_iterator().get_next()
#调用LSTM网络,得到预测值
with tf.variable_scope("prediction_sin",reuse=True):
pred, _, _ = lstm(X, [0.0], False)
#结果存在predictions中
predictions = []
labels = []
for i in range(TESTING_EXAMPLES):
p, l = sess.run([pred, Y])
predictions.append(p)
labels.append(l)
predictions = np.array(predictions).squeeze()
labels = np.array(labels).squeeze()
#误差
mse = np.sqrt(((predictions-labels)**2).mean(axis=0))
print("MSE is: %f" %mse)
#绘图
plt.figure()
plt.plot(predictions, label= 'predictions')
plt.plot(labels, label= 'labels')
plt.legend()
plt.show()
#生成正弦函数,得到训练集和测试集
start = (TRAING_EXAMPLES+TIMESTEPS)*INTERVAL
end = start + (TESTING_EXAMPLES+TIMESTEPS)*INTERVAL
#训练集,个数为TRAING_EXAMPLES
train_x, train_y = generator(np.sin(np.linspace(0, start,TRAING_EXAMPLES + TIMESTEPS,dtype=np.float32)))
#测试集,个数为TESTING_EXAMPLES
test_x, test_y = generator(np.sin(np.linspace(start, end, TESTING_EXAMPLES + TIMESTEPS,dtype=np.float32)))
with tf.Session() as sess :
#训练
train(sess, train_x, train_y)
#测试
run_eval(sess, test_x, test_y)
运行上述代码,可以得到输出:
以及图像
可以看出,LSTM网络可以很好地对正弦函数进行预测。