9-32seq2seqstock.py



# -*- coding: utf-8 -*-
"""
Created on Fri Jul  6 09:22:35 2018

@author: lWX379138
"""
import random
import math

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pands as pd
pd.options.mode.chained_assignment = None   #default = 'warn'

csv_fn = '600000.csv'

#载入股票函数
def loadstock(window_size):
    names = ['date','code','name','Close','top_price','low_price','opening_price','bef_price','floor_price','floor','exchange','Volume','amount','总市值','流通市值']
    data = pd.read_csv(csv_fn,names=names,header = None,encoding='gbk')
   
    predictor_names = ['Close']
    training_features = np.asarray(data[predictor_names],dtype = 'float32')
    kept_values = training_features[1000:]
   
    X = []
    Y = []
    for i in range(len(kept_values) - window_size * 2):
        #X为前window_size 个序列,Y 为后window_size 个 序列
        X.append(kept_values[i:i+window_size])
        Y.append(kept_values[i+window_size:i+window_size*2])
        pass
   
    X = np.reshape(X,[-1,window_size,len(predictor_names)])
    Y = np.reshape(Y,[-1,window_size,len(predictor_names)])
    print(X.shape)
   
    return X,Y
pass

def do_generate_x_y(X,Y,batch_size):
    assert X.shape == Y.shape,(X.shape,Y.shape)
    idxes = np.random.randint(X.shape[0],size = batch_size)
    X_out = np.array(X[idxes]).transpose((1,0,2))
    Y_out = np.array(Y[idxes]).transpose((1,0,2))
    return X_out,Y_out
pass

#生成 15 个连续序列,将cos 和 sin 随机偏移变化后的值叠加起来
def generate_data(isTrain,batch_size):
    #用前40个样本来预测后40个样本
   
    seq_length = 40
    seq_length_test = 40
   
    global Y_train
    global X_train
    global X_test
    global Y_test
   
    #载入内存
    if len(Y_train) == 0:
        X,Y = loadstock(window_size=seq_length)
       
        #split 80-20
        X_train = X[:int(len(X) * 0.8)]
        Y_train = Y[:int(len(Y) * 0.8)]
        pass
   
    if len(Y_test) == 0:
        X,Y = loadstock( window_size= seq_length_test)
       
        #split 80 -20
        X_test = X[int(len(X) * 0.8 ):]
        Y_test = Y[int(len(Y) * 0.8 ):]
        pass
   
   
    if isTrain:
        return do_generate_x_y(X_train,Y_train,batch_size)
    else:
        return do_generate_x_y(X_test,Y_test,batch_size)
    pass
pass

sample_now ,sample_f = generate_data(isTrain= True,batch_size=3)
print('training examples:')
print(sample_now.shape)
print('(seq_length,batch_size,output_dim)')

seq_length = sample_now.shape[0]
batch_size = 100        #股票数据没有固定规则,并且数据量大,所以加大batch_size

output_dim = input_dim = sample_now.shape[-1]
hidden_dim = 12
layers_stacked_count = 2

#学习率
learning_rate = 0.04
nb_iters = 100000           #加大迭代次数

lambda_12_reg = 0.003       #L2 正则参数

tf.reset_default_graph()

encoder_input = []
expected_output = []
decode_input = []

for i in range(seq_length):
    encoder_input.append(tf.placeholder(tf.float32,shape=(None,input_dim)) )
    expected_output.append(tf.placeholder(tf.float32,shape=(None,output_dim)) )
    decode_input.append(tf.placeholder(tf.float32,shape=(None,input_dim)) )
    pass

tcells = []
for i in range(layers_stacked_count):
    tcells.append(tf.contrib.rnn.GRUCell(hidden_dim))
    pass
Mcell = tf.contrib.rnn.MultiRNNCell(tcells)

dec_outputs,dec_memory = tf.contrib.legacy_seq2seq.basic_rnn_seq2seq(encoder_input,decode_input,Mcell)

reshaped_outputs = []
for ii in dec_outputs:
    reshaped_outputs.append(tf.contrib.layers.fully_connected(ii,output_dim,activation_fn = None) )
    pass

#计算 L2 的 loss 值
output_loss = 0
for _y,_Y in zip(reshaped_outputs,expected_output):
    output_loss += tf.reduce_mean( tf.pow (_y - _Y , 2 ) )
    pass

#求正则化loss值
reg_loss = 0
for tf_var in tf.trainable_variables():
    if not ('fully_connected' in tf_var.name ):
        reg_loss += tf.reduce_mean(tf.nn.l2_loss(tf_var))
        pass
    pass

loss = output_loss + lambda_12_reg * reg_loss
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
sess = tf.InteractiveSession()

def train_batch(batch_size):
    X, Y = generate_data(isTrain=True,batch_size=batch_size)
    feed_dict = {encoder_input[t] : X[t] for t in range(len(encoder_input))}
    feed_dict.update({expected_output[t]:Y[t] for t in range(len(expected_output))})
   
    c = np.concatenate(([np.zeros_like(Y[0])] , Y[:-1] ),axis = 0)
   
    feed_dict.update({decode_input[t]:c[t] for t in range(len(c)) })
   
    _,loss_t = sess.run([train_op,loss],feed_dict)
    return loss_t
pass

def test_batch(batch_size):
    X, Y = generate_data(isTrain=True,batch_size=batch_size)
    feed_dict = {encoder_input[t] : X[t] for t in range(len(encoder_input))}
    feed_dict.update({expected_output[t]:Y[t] for t in range(len(expected_output))})
   
    c = np.concatenate(([np.zeros_like(Y[0])] , Y[:-1] ),axis = 0)
   
    feed_dict.update({decode_input[t]:c[t] for t in range(len(c)) })
    output_lossv ,reg_lossv ,loss_t = sess.run([output_loss,reg_loss,loss],feed_dict)
    print('--------------------')
    print(output_lossv,reg_lossv)
    return loss_t
pass

#训练
train_losses = []
test_losses = []

sess.run(tf.global_variables_initializer())

for t in range(nb_iters + 1):
    train_loss = train_batch(batch_size)
    train_losses.append(train_loss)
    if t % 50 == 0:
        test_loss = test_batch(batch_size)
        test_losses.append(test_loss)
        print("Step {}/{} ,train loss :{},\tTEST loss:{}".format(t,nb_iters,train_loss,test_loss))
        pass
    pass

print("Fin.train loss: {},\tTEST loss :{}".format(train_loss,test_loss))

#输出loss 图例
plt.figure(figsize = (12,6))
plt.plot(np.array(range(0,len(test_losses))) / float(len(test_losses) -1) * (len(train_losses) -1 ),np.log(test_losses),label="Test loss" )
plt.plot(np.log(train_losses) , label = "Train loss")
plt.title("Training errors over time(on a logarithmic scale)")
plt.xlabel("Iteration")
plt.ylabel("log(LOSS)")
plt.legend(loc='best')
plt.show()

#测试
nb_predictions = 4
print("visualize {} predictions data:".format(nb_predictions))

preout = []
X,Y = generate_data(isTrain=False,batch_size= nb_predictions)
print(np.shape(X),np.shape(Y))
for tt in range(seq_length):
    feed_dict = {encoder_input[t]:X[t+tt] for t in range(seq_length)}
    feed_dict.update({expected_output[t] :Y[t+tt] for t in range(len(expected_output))})
    c = np.concatenate(([np.zeros_like(Y[0])],Y[tt:seq_length+tt-1]),axis = 0)      #从前15个序列的最后一个开始预测
   
    feed_dict.update({decode_input[t] :c[t] for t in range(len(c))})
    outputs = np.array(sess.run([reshaped_outputs],feed_dict)[0] )
    preout.append(outputs[-1])
   
    pass
print(np.shape(preout))  #将每个未知预测值收集起来准备显示出来
preout = np.reshape(preout,[seq_length,nb_predictions,output_dim])

for j in range(nb_predictions):
    plt.figure(figsize=(12,3))
   
    for k in range(output_dim):
        past = X[:,j,k]
        expected = Y[seq_length-1:,j,k]
       
        pred = preout[:,j,k]
       
        label1 = "past" if k == 0 else "_nolegend_"
        label2 = 'future' if k ==0 else '_nolegend_'
        label3 = 'Pred' if k ==0 else '_nolegend_'
        plt.plot(range(len(past)),past,'o--b',label = label1)
        plt.plot(range(len(past),len(expected) + len(past)),expected,'x--b',label =label2)
        plt.plot(range(len(past),len(pred) + len(past)),pred,'o--y',label = label3)
        pass
   
    plt.legend(loc = 'best')
    plt.title("Predictions vs. future")
    plt.show()
   
    pass
pass


你可能感兴趣的:(tensorflow)