利用LSTM进行股票预测分析

需要使用的库:

  • tushare
  • pytorch
  • pandas
  • numpy
  • matplotlib

1. 数据导入

import tushare as ts 

ts.set_token('98edbe9c3e444002decb09646838a02f0307e41bd5ce51bb5b2a99c8')
ts_pro = ts.pro_api()
help(ts)
Help on package tushare:

NAME
    tushare - # -*- coding:utf-8 -*-

PACKAGE CONTENTS
    bond (package)
    coins (package)
    data (package)
    fund (package)
    futures (package)
    internet (package)
    pro (package)
    stock (package)
    subs (package)
    trader (package)
    util (package)

VERSION
    1.2.62

AUTHOR
    Jimmy Liu

FILE
    e:\anaconda3\lib\site-packages\tushare\__init__.py
# 股票代码
ts_code = '002069.SZ'

# 开始时间
start_date = '2006-01-01'

# 结束时间
end_date = '2020-01-01'

df = ts_pro.daily(
        ts_code=ts_code,
        start_date=start_date,
        end_date=end_date)

# 将获取的文件存储在本地
df.to_csv('002069.csv', index=0)

import pandas as pd 

df = pd.read_csv('002069.csv')

# 按照时间进行排序,方便预测处理
df = df.sort_values(['trade_date'], ascending=True)

# 简要查看文件信息
df.head()
ts_code trade_date open high low close pre_close change pct_chg vol amount
3034 002069.SZ 20060928 60.89 64.48 58.00 62.11 25.00 37.11 148.44 169301.73 1.028809e+06
3033 002069.SZ 20060929 64.10 67.00 62.48 62.99 62.11 0.88 1.42 49290.44 3.182028e+05
3032 002069.SZ 20061009 63.00 66.15 62.16 65.00 62.99 2.01 3.19 28449.04 1.827514e+05
3031 002069.SZ 20061010 64.89 71.50 64.00 71.49 65.00 6.49 9.98 34935.76 2.381439e+05
3030 002069.SZ 20061011 70.20 71.80 69.22 69.99 71.49 -1.50 -2.10 15807.24 1.109196e+05

2. 数据预处理

# 选取这几个属性作为输入特征
use_cols = ['open', 'high', 'low', 'close', 'pre_close', 'vol', 'amount']
df = df[use_cols]
df.head()
open high low close pre_close vol amount
3034 60.89 64.48 58.00 62.11 25.00 169301.73 1.028809e+06
3033 64.10 67.00 62.48 62.99 62.11 49290.44 3.182028e+05
3032 63.00 66.15 62.16 65.00 62.99 28449.04 1.827514e+05
3031 64.89 71.50 64.00 71.49 65.00 34935.76 2.381439e+05
3030 70.20 71.80 69.22 69.99 71.49 15807.24 1.109196e+05
# 为了归一化后复现原来数据
close_min = df['close'].min()
close_max = df['close'].max()
# 归一化处理(0,1)
df=df.apply(lambda x:(x-min(x))/(max(x)-min(x)))
df.head()
open high low close pre_close vol amount
3034 0.401512 0.416768 0.404243 0.401345 0.151699 0.166571 0.800115
3033 0.423566 0.433710 0.436792 0.407265 0.401345 0.047720 0.247007
3032 0.416008 0.427995 0.434467 0.420787 0.407265 0.027080 0.141577
3031 0.428993 0.463964 0.447835 0.464447 0.420787 0.033504 0.184693
3030 0.465476 0.465981 0.485760 0.454356 0.464447 0.014561 0.085666
import numpy as np 
# 序列长度为30,即用前一个月的数据预测之后一天的数据
sequence = 30

X = []
Y = []

for i in range(df.shape[0]-sequence):
    # 选择use_cols作为特征
    X.append(np.array(df.iloc[i:(i+sequence),].values, dtype=np.float))
    # 选择close作为标签输出
    Y.append(np.array(df.iloc[(i+sequence),3],dtype=np.float))

# 划分训练集和测试集
trainx, trainy = X[:int(0.8*df.shape[0])], Y[:int(0.8*df.shape[0])]
testx, testy = X[int(0.8*df.shape[0]):], Y[int(0.8*df.shape[0]):]
print(len(trainx))
print(len(testx))
2428
577
import torch
import torch.utils.data as Data 
torch.manual_seed(1)

# list -> numpy
trainx = np.array(trainx)
trainy = np.array(trainy)
testx = np.array(testx)
testy = np.array(testy)

# numpy -> torch
trainx = torch.from_numpy(trainx)
trainy = torch.from_numpy(trainy)
testx = torch.from_numpy(testx)
testy = torch.from_numpy(testy)

print('trainx size: ', trainx.size())
print('trainy size: ', trainy.size())
print('testx size: ', testx.size())
print('testy size: ', testy.size())
trainx size:  torch.Size([2428, 30, 7])
trainy size:  torch.Size([2428])
testx size:  torch.Size([577, 30, 7])
testy size:  torch.Size([577])
#  批处理 batch的大小为32
train_dataset = Data.TensorDataset(trainx, trainy)
test_dataset = Data.TensorDataset(testx, testy)

train_loader = Data.DataLoader(
    dataset=train_dataset,
    batch_size=32,
    shuffle=True,
    num_workers=2
)

test_loader = Data.DataLoader(
    dataset=test_dataset,
    batch_size=32,
    shuffle=True,
    num_workers=2
)

3. 定义网络模型

input_size = 7
seq_len = 30
hidden_size = 32
output_size = 1

import torch.nn as nn
import torch.nn.functional as F 
from torch.autograd import Variable

class MyNet(nn.Module):

    def __init__(self, input_size=input_size, hidden_size=hidden_size, output_size=output_size):
        super(MyNet, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
        self.fc = nn.Linear(self.hidden_size*seq_len, self.output_size)
    
    def forward(self, input):
        out,_ = self.lstm(input)
        b, s, h = out.size()
        out = self.fc(out.reshape(b, s*h))
        return out 

net = MyNet()
print(net)

MyNet(
  (lstm): LSTM(7, 32, batch_first=True)
  (fc): Linear(in_features=960, out_features=1, bias=True)
)

4. 选择损失函数及优化器

import torch.optim as optim 
from tqdm import tqdm

loss_function = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

for epoch in tqdm(range(100)):
    total_loss = 0
    for _,(data, label) in enumerate(train_loader):
        data = Variable(data).float()
        pred = net(data)
        label = Variable(label).float()
        label = label.unsqueeze(1)
        loss = loss_function(pred, label)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        total_loss += loss.item()
        
    if (epoch+1) % 10 == 0:
        print('Epoch: ', epoch+1, ' loss: ', total_loss)
 10%|█         | 10/100 [00:17<02:32,  1.70s/it]Epoch:  10  loss:  0.03518655754669453
 20%|██        | 20/100 [00:34<02:14,  1.68s/it]Epoch:  20  loss:  0.023873500191257335
 30%|███       | 30/100 [00:53<02:11,  1.88s/it]Epoch:  30  loss:  0.02149457185441861
 40%|████      | 40/100 [01:14<02:07,  2.13s/it]Epoch:  40  loss:  0.017912164659719565
 50%|█████     | 50/100 [01:31<01:25,  1.72s/it]Epoch:  50  loss:  0.013031252356086043
 60%|██████    | 60/100 [01:51<01:14,  1.87s/it]Epoch:  60  loss:  0.014165752041662927
 70%|███████   | 70/100 [02:10<01:02,  2.09s/it]Epoch:  70  loss:  0.011516284157551127
 80%|████████  | 80/100 [02:27<00:35,  1.79s/it]Epoch:  80  loss:  0.01033103184090578
 90%|█████████ | 90/100 [02:47<00:17,  1.74s/it]Epoch:  90  loss:  0.010540407126427453
100%|██████████| 100/100 [03:07<00:00,  1.88s/it]Epoch:  100  loss:  0.01101792572899285

5. 测试模型

pred_list = []
label_list = []

for _, (data, label) in enumerate(test_loader):
    data = Variable(data).float()
    pred = net(data)
    pred_list.extend(pred.data.squeeze(1).tolist())
    label_list.extend(label.tolist())

pred_list[:5]
[0.003661651164293289,
 0.004493666812777519,
 0.017206232994794846,
 0.004013188183307648,
 -0.003268543630838394]
label_list[:5]
[0.005449041372351158,
 0.007938109653548601,
 0.015136226034308779,
 0.00538176925664312,
 0.0014127144298688192]

简单查看预测结果与真实值,发现相差不是特别明显

import matplotlib.pyplot as plt 


plt.plot([i*(close_max-close_min)+close_min for i in pred_list[:100]] , label='pred')
plt.plot([i*(close_max-close_min)+close_min for i in label_list[:100]], label='real')
plt.title('Stock Forecast')
plt.legend()
plt.show()

利用LSTM进行股票预测分析_第1张图片

你可能感兴趣的:(python,深度学习,LSTM,股票预测)