注:本文仅作为tensorflow入门学习参考,对于结果请勿较真
导入相应的包
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import shuffle
读取股票数据文件
data = pd.read_csv('000001.SZ.CSV',encoding='gbk',usecols = [4,5,6,7,20,21])
data.head(5)
开盘价(元) | 最高价(元) | 最低价(元) | 收盘价(元) | 市盈率 | 市净率 | |
---|---|---|---|---|---|---|
0 | 0.5698 | 0.5698 | 0.5698 | 0.5698 | 33.5310 | 10.0482 |
1 | 0.5670 | 0.5670 | 0.5670 | 0.5670 | 33.3667 | 9.9990 |
2 | 0.5642 | 0.5642 | 0.5642 | 0.5642 | 33.2025 | 9.9498 |
3 | 0.5586 | 0.5586 | 0.5586 | 0.5586 | 32.8740 | 9.8514 |
4 | 0.5559 | 0.5559 | 0.5559 | 0.5559 | 32.7098 | 9.8022 |
将次日的开盘价作为今日要预测的数据y_pred
y_pred = np.zeros(data.shape[0])
for i in range(len(data)-1):
y_pred[i] = round(data['开盘价(元)'][i+1],4)
data['y_pred'] = y_pred
data.tail(5)
开盘价(元) | 最高价(元) | 最低价(元) | 收盘价(元) | 市盈率 | 市净率 | y_pred | |
---|---|---|---|---|---|---|---|
6139 | 10.47 | 10.48 | 10.42 | 10.46 | 6.8451 | 0.9267 | 10.47 |
6140 | 10.47 | 10.52 | 10.41 | 10.50 | 6.8713 | 0.9303 | 10.51 |
6141 | 10.51 | 10.53 | 10.45 | 10.51 | 6.8778 | 0.9312 | 10.52 |
6142 | 10.52 | 10.53 | 10.48 | 10.52 | 6.8844 | 0.9321 | 10.54 |
6143 | 10.54 | 10.54 | 10.46 | 10.50 | 6.8713 | 0.9303 | 0.00 |
最后一行的数据是没有y_pred的,所以直接把最后一整行都删掉
我们的目标就是将前六列的数据输入到模型中,然后让模型输出一个尽可能接近y_pred的值
也就是用今日的开盘价(元)、最高价(元)、最低价(元)、收盘价(元)、市盈率、市净率来预测明天的开盘价
stockdatas = data.drop(data.index[-1]).values
x_data = stockdatas[:,:6]
y_data = stockdatas[:,6]
# 输出最后一行的值
print(x_data[-1])
print(y_data[-1])
[10.52 10.53 10.48 10.52 6.8844 0.9321]
10.54
归一化训练样本x_data
scaler = MinMaxScaler()
scaler.fit(x_data)
X_data = scaler.transform(x_data)
X_data[-1]
array([0.59144014, 0.59188214, 0.61441079, 0.59880055, 0.02325307,
0.00445284])
定义模型 y_pred = x * w + b ,注意第一个矩阵的列数要和第二个矩阵的行数相同
x = tf.placeholder(tf.float32, [None,6], name='X')
y = tf.placeholder(tf.float32, [None,1], name='Y')
with tf.name_scope('Model'):
w = tf.Variable(tf.random_normal([6,1],stddev=0.01),name='W')
b = tf.Variable(1.0,name='b')
def model(x,w,b):
return tf.matmul(x,w) + b
pred = model(x,w,b)
定义损失函数和优化器
train_epochs = 3
learning_rate = 0.01
with tf.name_scope('lossfunc'):
loss_function = tf.reduce_mean(tf.pow(y-pred,2))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss_function)
开始训练
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
for e in range(train_epochs):
loss_sum = 0.0
for xs, ys in zip(X_data, y_data):
xs = xs.reshape(1,6)
ys = ys.reshape(1,1)
_,loss = sess.run([optimizer,loss_function],feed_dict={x:xs,y:ys})
loss_sum = loss_sum + loss
X_data, y_data = shuffle(X_data,y_data)
b0 = sess.run(b)
w0 = sess.run(w)
loss_avg = loss_sum/len(y_data)
print('epoch={},lossavg={},w={},b={}'.format(e,loss_avg,w0,b0))
epoch=0,lossavg=0.13093365134630774,w=[[2.2813892]
[2.410216 ]
[2.3408403]
[2.4081025]
[2.2087398]
[2.3941853]],b=4.765807628631592
epoch=1,lossavg=0.19175580245865206,w=[[4.057511 ]
[4.264938 ]
[4.190086 ]
[4.2901344 ]
[0.01645006]
[0.01836737]],b=0.41750502586364746
epoch=2,lossavg=0.012530410840827653,w=[[4.0354447 ]
[4.2829742 ]
[4.2025666 ]
[4.34924 ]
[0.01506332]
[0.01617897]],b=0.4302012026309967
训练完成,输一行数据进去看看效果
xtest = X_data[1050].reshape(1,-1)
pred_val = sess.run(pred, feed_dict={x:xtest})
pred_val
array([[16.004889]], dtype=float32)
看看真实的结果
y_data[1050]
15.51