sk-learn回归算法总结

import pandas as pd
import tushare as ts
import random
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score,accuracy_score,r2_score
from sklearn import preprocessing
from sklearn.decomposition import  PCA

# numpy的默认使用科学计数法显示数据的改变办法,True是关闭科学计数显示
np.set_printoptions(suppress=True)

ts.set_token('462fc78ba2417e9a79a5ac00d8b71b2959b2a8875a0457952921ade4')
pro = ts.pro_api()

df=pd.read_excel(r'E:\权限管理\wq\works\datafile\Datas000001SZ.xlsx')#这个会直接默认读取到这个Excel的第一个表单
df=df.sort_values('trade_date')

# 所有列名列表
columns_list=df.columns.values.tolist()
# 取消的参数列表
cancel_factor=[]

for i in columns_list:
    value_list=[]
    value_list.extend(df[i].values)
    if value_list.count(0)==counts:
        cancel_factor.append(i)
    elif a_avg_counts<value_list.count(0)<counts:
        true_datas = sum(value_list) / counts
        df[i] = true_datas
    else:
        pass
#训练数据去重不需要列名
x_cancel_factor=['Unnamed: 0', 'ts_code','close','trade_date','ann_date_x','f_ann_date_x','end_date_x','ann_date_y','f_ann_date_y','end_date_y',
                  'ann_date','f_ann_date','end_date']
x_cancel_factor.extend(cancel_factor)
#训练数据去重不需要列名
y_cancel_factor=['Unnamed: 0','trade_date','ann_date_x','f_ann_date_x','end_date_x','ann_date_y','f_ann_date_y','end_date_y',
                  'ann_date','f_ann_date','end_date']
y_cancel_factor.extend(cancel_factor)
# data_x = df.drop(x_cancel_factor, axis=1)
# dataframe获取指定列的内容
data_x = df[['open', 'high', 'low', 'pre_close', 'change', 'pct_chg', 'vol', 'amount']]
# data_y = df.drop(y_cancel_factor, axis=1)

x=data_x[0:len(data_x)-2].values
#回归数据进行归一化
x=preprocessing.scale(x)
# y=data_y[1:len(data_y)-1]['change'].values
y=df[1:len(df)-1]['close'].values

# 降低到20维度
# estimator = PCA(n_components=10)
# print('降维训练数据')
# pca_x_train = estimator.fit_transform(x[:1800])
# print('降维测试数据')
# pca_x_test = estimator.transform(x[1801:len(x)-1])

###########2.回归部分##########
def try_different_method(model):
    print('开始训练')
    model.fit(x[:1800],y[:1800])
    # model.fit(pca_x_train,y[:1800])
    print('开始预测')
    result = model.predict(x[1801:len(x)-1])
    # result = model.predict(pca_x_test)
    print('开始计算准确率')
    # score = model.score(x[1801:len(x)-1], y[1801:len(y)-1])
    score = r2_score(y[1801:len(y)-1],result)
    print('返回结果')
    # plt.figure()
    # plt.plot(np.arange(len(result)),y[1801:len(y)-1],'*',)
    # plt.plot(np.arange(len(result)),result,'.')
    # plt.title('score: %f' % score)
    # plt.legend()
    # plt.show()
    return result,score


###########3.具体方法选择##########
###3.1决策树回归####
from sklearn import tree
model_DecisionTreeRegressor = tree.DecisionTreeRegressor()
####3.2线性回归####
from sklearn import linear_model
model_LinearRegression = linear_model.LinearRegression()
####3.3SVM回归####
from sklearn import svm
model_SVR = svm.SVR()
####3.4KNN回归####
from sklearn import neighbors
model_KNeighborsRegressor = neighbors.KNeighborsRegressor()
####3.5随机森林回归####
from sklearn import ensemble
model_RandomForestRegressor = ensemble.RandomForestRegressor(n_estimators=20)#这里使用20个决策树
####3.6Adaboost回归####
from sklearn import ensemble
model_AdaBoostRegressor = ensemble.AdaBoostRegressor(n_estimators=50)#这里使用50个决策树
####3.7GBRT回归####
from sklearn import ensemble
model_GradientBoostingRegressor = ensemble.GradientBoostingRegressor(n_estimators=100)#这里使用100个决策树
####3.8Bagging回归####
from sklearn.ensemble import BaggingRegressor
model_BaggingRegressor = BaggingRegressor()
####3.9ExtraTree极端随机树回归####
from sklearn.tree import ExtraTreeRegressor
model_ExtraTreeRegressor = ExtraTreeRegressor()

score_list=[]
###########4.具体方法调用部分##########
result,score=try_different_method(model_LinearRegression)
result2,score2=try_different_method(model_DecisionTreeRegressor)
result3,score3=try_different_method(model_SVR)
result4,score4=try_different_method(model_KNeighborsRegressor)
result5,score5=try_different_method(model_RandomForestRegressor)
result6,score6=try_different_method(model_AdaBoostRegressor)
result7,score7=try_different_method(model_GradientBoostingRegressor)
result8,score8=try_different_method(model_BaggingRegressor)
result9,score9=try_different_method(model_ExtraTreeRegressor)

print('线性回归准确率:%s'%(score))
print('决策树回归准确率:%s'%(score2))
print('随机森林回归准确率:%s'%(score5))
print('SVM回归准确率:%s'%(score3))
print('KNN回归准确率:%s'%(score4))
print('Adaboost回归准确率:%s'%(score6))
print('GBRT回归准确率:%s'%(score7))
print('Bagging回归准确率:%s'%(score8))
print('ExtraTree极端随机树回归准确率:%s'%(score9))

你可能感兴趣的:(sk-learn回归算法总结)