XGboost预测

import pandas as pd
import matplotlib.pyplot as plt
import math
import xgboost_regression
import numpy as np
from numpy import loadtxt
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error, mean_absolute_error

def xgboostre(df):
    import xgboost as xgb

    import numpy as np

    Y=df['CLOSE']#因变量
    #计算训练集和测试集长度
    m=df.shape[0]
    m0=m-1
    m1=np.floor(0.9*m0)
    m2=m0-m1

    #计算对数收益率
    y=np.zeros((len(Y)-1,1))
    for i in range (0,int(m0)):
        y[i]=np.log(Y[i+1]/Y[i])

    #读取输入变量
    X=df[['ATR','MTM','RSI','MA','MACD','CCI','KDJ']]
    x0=X.as_matrix()
    x=x0[0:m0]

    #计算训练集和测试集
    X_train=x[0:int(m1)]
    X_test=x[int(m1):int(m0)]
    y_train=y[0:int(m1)]
    y_test=y[int(m1):int(m0)]

    # XGBoost的参数
    n_estimators = 100             # for the initial model before tuning. default = 100
    max_depth = 4                  # for the initial model before tuning. default = 3
    learning_rate = 0.1            # for the initial model before tuning. default = 0.1
    min_child_weight = 1           # for the initial model before tuning. default = 1
    subsample = 1                  # for the initial model before tuning. default = 1
    colsample_bytree = 1           # for the initial model before tuning. default = 1
    colsample_bylevel = 1          # for the initial model before tuning. default = 1
    train_test_split_seed = 111    # 111
    model_seed = 100

    # 训练XGBoost回归模型
    model = xgb.XGBRegressor(seed=model_seed,
                        n_estimators=n_estimators,
                        max_depth=max_depth,
                        learning_rate=learning_rate,
                         min_child_weight=min_child_weight)

    # Train the regressor
    model.fit(X_train, y_train)


    #性能评价: RMSE, MAE

    # 训练误差
    pred_on_train = model.predict(X_train)
    train=np.transpose([pred_on_train])
    #data=np.hstack((y_train,train))
    Mae_trian=np.average(abs(train-y_train))#计算训练误差


    # 测试集上的预测
    pred_on_test = model.predict(X_test)
    pre=np.transpose([pred_on_test])
    data=np.hstack((y_test,pre))
    Mae_test=np.average(abs(y_test-pre))#计算训练误差
    return (data,Mae_trian,Mae_test)
df0 = pd.read_excel('D:\PycharmProject\jupyter\data\AEE.N.xls',skip_header=1)

df1 = pd.read_excel('D:\PycharmProject\jupyter\data\BHLB.N.xls',skip_header=1)

df2 = pd.read_excel('D:\PycharmProject\jupyter\data\CC.N.xls',skip_header=1)

df3 = pd.read_excel('D:\PycharmProject\jupyter\data\CCL.N.xls',skip_header=1)

df4 = pd.read_excel('D:\PycharmProject\jupyter\data\DHT.N.xls',skip_header=1)

df5 = pd.read_excel('D:\PycharmProject\jupyter\data\DOOR.N.xls',skip_header=1)

df6 = pd.read_excel('D:\PycharmProject\jupyter\data\EC.N.xls',skip_header=1)

df7 = pd.read_excel('D:\PycharmProject\jupyter\data\EDU.N.xls',skip_header=1)


df8 = pd.read_excel('D:\PycharmProject\jupyter\data\GD.N.xls',skip_header=1)


df9 = pd.read_excel('D:\PycharmProject\jupyter\data\IBA.N.xls',skip_header=1)


df10 = pd.read_excel('D:\PycharmProject\jupyter\data\IDT.N.xls',skip_header=1)


df11 = pd.read_excel('D:\PycharmProject\jupyter\data\JNJ.N.xls',skip_header=1)


df12 = pd.read_excel('D:\PycharmProject\jupyter\data\KFY.N.xls',skip_header=1)


df13 = pd.read_excel('D:\PycharmProject\jupyter\data\LADR.N.xls',skip_header=1)


df14 = pd.read_excel('D:\PycharmProject\jupyter\data\MFA.N.xls',skip_header=1)


df15 = pd.read_excel('D:\PycharmProject\jupyter\data\RACE.N.xls',skip_header=1)


df16 = pd.read_excel('D:\PycharmProject\jupyter\data\AHS.N.xls',skip_header=1)

df17 = pd.read_excel('D:\PycharmProject\jupyter\data\VRS.N.xls',skip_header=1)


df18 = pd.read_excel('D:\PycharmProject\jupyter\data\WRK.N.xls',skip_header=1)


df19 = pd.read_excel('D:\PycharmProject\jupyter\data\XPO.N.xls',skip_header=1)


df20 = pd.read_excel('D:\PycharmProject\jupyter\data\YUM.N.xls',skip_header=1)
Data= dict()
MAE_train=np.zeros(shape=(21,1))
MAE_test=np.zeros(shape=(21,1))
df=([df0,df1,df2,df3,df4,df5,df6,df7,df8,df9,df10,df11,df12,df13,df14,df15,df16,df17,df18,df19,df20])
for i in range(0, 21): 
    [Data[i],MAE_train[i],MAE_test[i]]=xgboostre(df[i]) 


    

np.savetxt('XGboost_MAE_train.csv', MAE_train, delimiter = ',')
np.savetxt('XGboost_MAE_test.csv', MAE_test, delimiter = ',')

你可能感兴趣的:(python,深度学习,深度学习,python)