#%%
#所需包
import pandas as pd
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
%matplotlib inline
from sklearn.preprocessing import MinMaxScaler
#%%
#读取数据
data=pd.read_csv('data_stocks.csv')
data.info()
#%%
#打印数据
data.head()
#%%
#绘制大盘趋势折线图
plt.plot(data['SP500'])
#%%
#将数据集分为训练和测试数据。
#分配比为8:2,即训练数据包含总数据集的80%。
#将DATE这个无关变量舍去。
data.drop('DATE', axis=1, inplace=True)
data_train = data.iloc[:int(data.shape[0] * 0.8), :]
data_test = data.iloc[int(data.shape[0] * 0.8):, :]
#%%
data_train.shape
#%%
data_test.shape
#%%
#将属性缩放到一个指定的最大和最小值(通常是(-1,1))之间
#通过sklearn.preprocessing.MinMaxScaler类实现。
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler.fit(data_train)
data_train = scaler.transform(data_train)
data_test = scaler.transform(data_test)
#%%
data_train
#%%
data_test
#%%
#定义测试数据和训练数据
X_train = data_train[:, 1:]
y_train = data_train[:, 0]
X_test = data_test[:, 1:]
y_test = data_test[:, 0]
#%%
X_train.shape
#%%
y_train.shape
#%%
#设置超参数
input_dim = X_train.shape[1]#输入维度
output_dim = 1#输出维度
#四个隐藏层
hidden_1 = 1024#第一层1024个神经元
hidden_2 = 512#第二层512个神经元
hidden_3 = 256#第三层256个神经元
hidden_4 = 128#第四层128个神经元
batch_size = 510#一次训练所选取的样本数。
epochs = 10#轮数
#%%
#设置占位符(placeholder)
X = tf.placeholder(shape=[None, input_dim], dtype=tf.float32)
Y = tf.placeholder(shape=[None], dtype=tf.float32)
#%%
# 第一层
W1 = tf.get_variable('W1', [input_dim, hidden_1], initializer=tf.contrib.layers.xavier_initializer(seed=1))
b1 = tf.get_variable('b1', [hidden_1], initializer=tf.zeros_initializer())
# 第二层
W2 = tf.get_variable('W2', [hidden_1, hidden_2], initializer=tf.contrib.layers.xavier_initializer(seed=1))
b2 = tf.get_variable('b2', [hidden_2], initializer=tf.zeros_initializer())
# 第三层
W3 = tf.get_variable('W3', [hidden_2, hidden_3], initializer=tf.contrib.layers.xavier_initializer(seed=1))
b3 = tf.get_variable('b3', [hidden_3], initializer=tf.zeros_initializer())
# 第四层
W4 = tf.get_variable('W4', [hidden_3, hidden_4], initializer=tf.contrib.layers.xavier_initializer(seed=1))
b4 = tf.get_variable('b4', [hidden_4], initializer=tf.zeros_initializer())
# 输出层
W5 = tf.get_variable('W5', [hidden_4, output_dim], initializer=tf.contrib.layers.xavier_initializer(seed=1))
b5 = tf.get_variable('b5', [output_dim], initializer=tf.zeros_initializer())
#%%
#设置网络体系结构
h1 = tf.nn.relu(tf.add(tf.matmul(X, W1), b1))
h2 = tf.nn.relu(tf.add(tf.matmul(h1, W2), b2))
h3 = tf.nn.relu(tf.add(tf.matmul(h2, W3), b3))
h4 = tf.nn.relu(tf.add(tf.matmul(h3, W4), b4))
out = tf.transpose(tf.add(tf.matmul(h4, W5), b5))
#%%
#设置损失函数(loss function)和优化器(Optimizer)
loss = tf.reduce_mean(tf.squared_difference(out, Y))
optimizer = tf.train.AdamOptimizer().minimize(loss)
#%%
with tf.Session() as sess:
# 初始化所有变量
sess.run(tf.global_variables_initializer())
for e in range(epochs):
# 将数据打乱
shuffle_indices = np.random.permutation(np.arange(y_train.shape[0]))
X_train = X_train[shuffle_indices]
y_train = y_train[shuffle_indices]
for i in range(y_train.shape[0] // batch_size):
start = i * batch_size
batch_x = X_train[start : start + batch_size]
batch_y = y_train[start : start + batch_size]
sess.run(optimizer, feed_dict={X: batch_x, Y: batch_y})
if i % 50 == 0:
print('MSE Train:', sess.run(loss, feed_dict={X: X_train, Y: y_train}))
print('MSE Test:', sess.run(loss, feed_dict={X: X_test, Y: y_test}))
y_pred = sess.run(out, feed_dict={X: X_test})
y_pred = np.squeeze(y_pred)
plt.plot(y_test, label='test')
plt.plot(y_pred, label='pred')
plt.title('Epoch ' + str(e) + ', Batch ' + str(i))
plt.legend()
plt.show()
#%%