import pandas as pd
import matplotlib.pyplot as plt
import math
import xgboost_regression
import numpy as np
from numpy import loadtxt
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error, mean_absolute_error
def xgboostre(df):
import xgboost as xgb
import numpy as np
Y=df['CLOSE']#因变量
#计算训练集和测试集长度
m=df.shape[0]
m0=m-1
m1=np.floor(0.9*m0)
m2=m0-m1
#计算对数收益率
y=np.zeros((len(Y)-1,1))
for i in range (0,int(m0)):
y[i]=np.log(Y[i+1]/Y[i])
#读取输入变量
X=df[['ATR','MTM','RSI','MA','MACD','CCI','KDJ']]
x0=X.as_matrix()
x=x0[0:m0]
#计算训练集和测试集
X_train=x[0:int(m1)]
X_test=x[int(m1):int(m0)]
y_train=y[0:int(m1)]
y_test=y[int(m1):int(m0)]
# XGBoost的参数
n_estimators = 100 # for the initial model before tuning. default = 100
max_depth = 4 # for the initial model before tuning. default = 3
learning_rate = 0.1 # for the initial model before tuning. default = 0.1
min_child_weight = 1 # for the initial model before tuning. default = 1
subsample = 1 # for the initial model before tuning. default = 1
colsample_bytree = 1 # for the initial model before tuning. default = 1
colsample_bylevel = 1 # for the initial model before tuning. default = 1
train_test_split_seed = 111 # 111
model_seed = 100
# 训练XGBoost回归模型
model = xgb.XGBRegressor(seed=model_seed,
n_estimators=n_estimators,
max_depth=max_depth,
learning_rate=learning_rate,
min_child_weight=min_child_weight)
# Train the regressor
model.fit(X_train, y_train)
#性能评价: RMSE, MAE
# 训练误差
pred_on_train = model.predict(X_train)
train=np.transpose([pred_on_train])
#data=np.hstack((y_train,train))
Mae_trian=np.average(abs(train-y_train))#计算训练误差
# 测试集上的预测
pred_on_test = model.predict(X_test)
pre=np.transpose([pred_on_test])
data=np.hstack((y_test,pre))
Mae_test=np.average(abs(y_test-pre))#计算训练误差
return (data,Mae_trian,Mae_test)
df0 = pd.read_excel('D:\PycharmProject\jupyter\data\AEE.N.xls',skip_header=1)
df1 = pd.read_excel('D:\PycharmProject\jupyter\data\BHLB.N.xls',skip_header=1)
df2 = pd.read_excel('D:\PycharmProject\jupyter\data\CC.N.xls',skip_header=1)
df3 = pd.read_excel('D:\PycharmProject\jupyter\data\CCL.N.xls',skip_header=1)
df4 = pd.read_excel('D:\PycharmProject\jupyter\data\DHT.N.xls',skip_header=1)
df5 = pd.read_excel('D:\PycharmProject\jupyter\data\DOOR.N.xls',skip_header=1)
df6 = pd.read_excel('D:\PycharmProject\jupyter\data\EC.N.xls',skip_header=1)
df7 = pd.read_excel('D:\PycharmProject\jupyter\data\EDU.N.xls',skip_header=1)
df8 = pd.read_excel('D:\PycharmProject\jupyter\data\GD.N.xls',skip_header=1)
df9 = pd.read_excel('D:\PycharmProject\jupyter\data\IBA.N.xls',skip_header=1)
df10 = pd.read_excel('D:\PycharmProject\jupyter\data\IDT.N.xls',skip_header=1)
df11 = pd.read_excel('D:\PycharmProject\jupyter\data\JNJ.N.xls',skip_header=1)
df12 = pd.read_excel('D:\PycharmProject\jupyter\data\KFY.N.xls',skip_header=1)
df13 = pd.read_excel('D:\PycharmProject\jupyter\data\LADR.N.xls',skip_header=1)
df14 = pd.read_excel('D:\PycharmProject\jupyter\data\MFA.N.xls',skip_header=1)
df15 = pd.read_excel('D:\PycharmProject\jupyter\data\RACE.N.xls',skip_header=1)
df16 = pd.read_excel('D:\PycharmProject\jupyter\data\AHS.N.xls',skip_header=1)
df17 = pd.read_excel('D:\PycharmProject\jupyter\data\VRS.N.xls',skip_header=1)
df18 = pd.read_excel('D:\PycharmProject\jupyter\data\WRK.N.xls',skip_header=1)
df19 = pd.read_excel('D:\PycharmProject\jupyter\data\XPO.N.xls',skip_header=1)
df20 = pd.read_excel('D:\PycharmProject\jupyter\data\YUM.N.xls',skip_header=1)
Data= dict()
MAE_train=np.zeros(shape=(21,1))
MAE_test=np.zeros(shape=(21,1))
df=([df0,df1,df2,df3,df4,df5,df6,df7,df8,df9,df10,df11,df12,df13,df14,df15,df16,df17,df18,df19,df20])
for i in range(0, 21):
[Data[i],MAE_train[i],MAE_test[i]]=xgboostre(df[i])
np.savetxt('XGboost_MAE_train.csv', MAE_train, delimiter = ',')
np.savetxt('XGboost_MAE_test.csv', MAE_test, delimiter = ',')