本次学习,打算初步通过LSTM来了解Paddle的一些细节
本文原始项目地址:
https://aistudio.baidu.com/aistudio/projectdetail/174237
本文数据:
https://aistudio.baidu.com/aistudio/datasetdetail/3580
#引用库文件
from __future__ import print_function
from paddle.utils.plot import Ploter
import numpy as np
import math
import matplotlib.pyplot as plt
import paddle
import paddle.fluid as fluid
SAVE_DIRNAME = 'model'
f = open('E:/testdata/stock_LSTM_fluid/datasets/stock_dataset.txt')
df = f.readlines()
f.close()
data = []
for line in df:
data_raw = line.strip('\n').strip('\r').split('\t')
data.append(data_raw)
data = np.array(data, dtype='float32')
print('数据类型:',type(data))
print('数据个数:', data.shape[0])
print('数据形状:', data.shape)
print('数据第一行:', data[0])
#训练集和测试集6 4 开
ratio = 0.6
DATA_NUM = len(data)
train_len = int(DATA_NUM * ratio)
test_len = DATA_NUM - train_len
train_data = data[:train_len]
# test_data = data[test_len:]
test_data = data[train_len:]
# 归一化 result_data = (data - avg) / (max_ - min_)
def normalization(in_data):
avg = np.mean(in_data, axis=0)
max_ = np.max(in_data, axis=0)
min_ = np.min(in_data, axis=0)
result_data = (in_data - avg) / (max_ - min_)
return result_data
train_data = normalization(train_data)
test_data = normalization(test_data)
# 构造 paddlepaddle 的reader
def my_train_reader():
def reader():
for temp in train_data:
yield temp[:-1], temp[-1]
return reader
def my_test_reader():
def reader():
for temp in test_data:
yield temp[:-1], temp[-1]
return reader
# 定义batch
train_reader = paddle.batch(
my_train_reader(),
batch_size=10)
# 搭建LSTM模型
DIM = 1
hid_dim2 = 1
x = fluid.layers.data(name='x', shape=[DIM], dtype='float32', lod_level=1)
label = fluid.layers.data(name='y', shape=[1], dtype='float32')
fc0 = fluid.layers.fc(input=x, size=DIM * 4)
lstm_h, c = fluid.layers.dynamic_lstm(
input=fc0, size=DIM * 4, is_reverse=False)
# 最大池化
lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
# 激活函数
lstm_max_tanh = fluid.layers.tanh(lstm_max)
# 全连接层
prediction = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')
# 代价函数
cost = fluid.layers.square_error_cost(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
# acc = fluid.layers.accuracy(input=prediction, label=label)
train_title = "Train cost"
test_title = "Test cost"
plot_cost = Ploter(train_title, test_title)
# 定义优化器
adam_optimizer = fluid.optimizer.Adam(learning_rate=0.005)
adam_optimizer.minimize(avg_cost)
# 使用GPU
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
feeder = fluid.DataFeeder(place=place, feed_list=[x, label])
def train_loop():
step = 0 # 画图
PASS_NUM = 100
for pass_id in range(PASS_NUM):
total_loss_pass = 0 #初始化每一个epoch的损失值初始值为0
for data in train_reader(): #data表示batch大小的数据样本
avg_loss_value, = exe.run(
fluid.default_main_program(),
feed= feeder.feed(data),
fetch_list=[avg_cost])
total_loss_pass += avg_loss_value
# 画图
plot_cost.append(train_title, step, avg_loss_value)
step += 1
plot_cost.plot()
fluid.io.save_inference_model(SAVE_DIRNAME, ['x'], [prediction], exe)
train_loop()
def convert2LODTensor(temp_arr, len_list):
temp_arr = np.array(temp_arr)
temp_arr = temp_arr.flatten().reshape((-1, 1))
print(temp_arr.shape)
return fluid.create_lod_tensor(
data=temp_arr,
recursive_seq_lens=[len_list],
place=fluid.CPUPlace()
)
def get_tensor_label(mini_batch):
tensor = None
labels = []
temp_arr = []
len_list = []
for _ in mini_batch:
labels.append(_[1])
temp_arr.append(_[0])
len_list.append(len(_[0]))
tensor = convert2LODTensor(temp_arr, len_list)
return tensor, labels
my_tensor = None
labels = None
# 定义batch
test_reader = paddle.batch(
my_test_reader(),
batch_size=325)
for mini_batch in test_reader():
my_tensor, labels = get_tensor_label(mini_batch) # 其实就是变成tensor格式的x和y
break
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope):
[inference_program, feed_target_names, fetch_targets] = (
fluid.io.load_inference_model(SAVE_DIRNAME, exe))
results = exe.run(inference_program,
feed= {'x': my_tensor},
fetch_list=fetch_targets)
result_print = results[0].flatten()
plt.figure()
plt.plot(list(range(len(labels))), labels, color='b') #蓝线为真实值
plt.plot(list(range(len(result_print))), result_print, color='r') #红线为预测值
plt.show()