Paddle飞桨学习日记: LSTM股票预测

Paddle飞桨学习日记: LSTM股票预测

    • 本项目代码基于PaddleHub的开源代码。
    • 下面是项目完整代码
    • 预测结果

本次学习,打算初步通过LSTM来了解Paddle的一些细节

本项目代码基于PaddleHub的开源代码。

本文原始项目地址:
https://aistudio.baidu.com/aistudio/projectdetail/174237

本文数据:
https://aistudio.baidu.com/aistudio/datasetdetail/3580

下面是项目完整代码

#引用库文件
from __future__ import print_function
from paddle.utils.plot import Ploter
import numpy as np
import math
import matplotlib.pyplot as plt
import paddle
import paddle.fluid as fluid


SAVE_DIRNAME = 'model'
f = open('E:/testdata/stock_LSTM_fluid/datasets/stock_dataset.txt')
df = f.readlines()
f.close()

data = []
for line in df:
    data_raw = line.strip('\n').strip('\r').split('\t')
    data.append(data_raw)
data = np.array(data, dtype='float32')

print('数据类型:',type(data))
print('数据个数:', data.shape[0])
print('数据形状:', data.shape)
print('数据第一行:', data[0])

#训练集和测试集6 4 开
ratio = 0.6
DATA_NUM = len(data)

train_len = int(DATA_NUM * ratio)
test_len = DATA_NUM - train_len

train_data = data[:train_len]
# test_data = data[test_len:]
test_data = data[train_len:]


# 归一化 result_data = (data - avg) / (max_ - min_)
def normalization(in_data):
    avg = np.mean(in_data, axis=0)
    max_ = np.max(in_data, axis=0)
    min_ = np.min(in_data, axis=0)
    result_data = (in_data - avg) / (max_ - min_)
    return result_data


train_data = normalization(train_data)
test_data = normalization(test_data)


# 构造 paddlepaddle 的reader
def my_train_reader():
    def reader():
        for temp in train_data:
            yield temp[:-1], temp[-1]
    return reader


def my_test_reader():
    def reader():
        for temp in test_data:
            yield temp[:-1], temp[-1]
    return reader


# 定义batch
train_reader = paddle.batch(
    my_train_reader(),
    batch_size=10)


# 搭建LSTM模型
DIM = 1
hid_dim2 = 1
x = fluid.layers.data(name='x', shape=[DIM], dtype='float32', lod_level=1)
label = fluid.layers.data(name='y', shape=[1], dtype='float32')
fc0 = fluid.layers.fc(input=x, size=DIM * 4)
lstm_h, c = fluid.layers.dynamic_lstm(
    input=fc0, size=DIM * 4, is_reverse=False)

# 最大池化
lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
# 激活函数
lstm_max_tanh = fluid.layers.tanh(lstm_max)
# 全连接层
prediction = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')
# 代价函数
cost = fluid.layers.square_error_cost(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
# acc = fluid.layers.accuracy(input=prediction, label=label)


train_title = "Train cost"
test_title = "Test cost"
plot_cost = Ploter(train_title, test_title)

# 定义优化器
adam_optimizer = fluid.optimizer.Adam(learning_rate=0.005)
adam_optimizer.minimize(avg_cost)

# 使用GPU
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
feeder = fluid.DataFeeder(place=place, feed_list=[x, label])


def train_loop():
    step = 0  # 画图
    PASS_NUM = 100
    for pass_id in range(PASS_NUM):
        total_loss_pass = 0  #初始化每一个epoch的损失值初始值为0
        for data in train_reader():  #data表示batch大小的数据样本
            avg_loss_value, = exe.run(
                fluid.default_main_program(),
                feed= feeder.feed(data),
                fetch_list=[avg_cost])
            total_loss_pass += avg_loss_value
        # 画图
        plot_cost.append(train_title, step, avg_loss_value)
        step += 1
        plot_cost.plot()
    fluid.io.save_inference_model(SAVE_DIRNAME, ['x'], [prediction], exe)


train_loop()


def convert2LODTensor(temp_arr, len_list):
    temp_arr = np.array(temp_arr)
    temp_arr = temp_arr.flatten().reshape((-1, 1))
    print(temp_arr.shape)
    return fluid.create_lod_tensor(
        data=temp_arr,
        recursive_seq_lens=[len_list],
        place=fluid.CPUPlace()
    )


def get_tensor_label(mini_batch):
    tensor = None
    labels = []

    temp_arr = []
    len_list = []
    for _ in mini_batch:
        labels.append(_[1])
        temp_arr.append(_[0])
        len_list.append(len(_[0]))
    tensor = convert2LODTensor(temp_arr, len_list)
    return tensor, labels


my_tensor = None
labels = None

# 定义batch
test_reader = paddle.batch(
    my_test_reader(),
    batch_size=325)

for mini_batch in test_reader():
    my_tensor, labels = get_tensor_label(mini_batch)  # 其实就是变成tensor格式的x和y
    break

place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope):
    [inference_program, feed_target_names, fetch_targets] = (
        fluid.io.load_inference_model(SAVE_DIRNAME, exe))
    results = exe.run(inference_program,
                      feed= {'x': my_tensor},
                      fetch_list=fetch_targets)

result_print = results[0].flatten()
plt.figure()
plt.plot(list(range(len(labels))), labels, color='b')  #蓝线为真实值
plt.plot(list(range(len(result_print))), result_print, color='r')  #红线为预测值
plt.show()

预测结果

如图结果为在RTX2080上训练了约5分钟。
Paddle飞桨学习日记: LSTM股票预测_第1张图片

你可能感兴趣的:(PaddlePaddle,深度学习,机器学习,lstm,paddlepaddle)