回归算法总结

from sklearn import svm
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tushare as ts
from sklearn import tree

# F1 score是精确率和召回率的调和平均值
from sklearn.metrics import f1_score,accuracy_score
from sklearn.decomposition import  PCA

# numpy的默认使用科学计数法显示数据的改变办法,True是关闭科学计数显示
np.set_printoptions(suppress=True)

ts.set_token('462fc78ba2417e9a79a5ac00d8b71b2959b2a8875a0457952921ade4')
pro = ts.pro_api()

df=pd.read_excel(r'E:\权限管理\wq\works\HData\HDatas000001SZ.xlsx')#这个会直接默认读取到这个Excel的第一个表单
df=df.sort_values('trade_date')
# 所有记录总数
counts=len(df)
# 纪律平均数
avg_counts=counts/2
# 所有列名列表
columns_list=df.columns.values.tolist()
# 取消的参数列表
cancel_factor=[]

for i in columns_list:
    value_list=[]
    value_list.extend(df[i].values)
    if value_list.count(0)==counts:
        cancel_factor.append(i)
    elif avg_counts < value_list.count(0) < counts:
        true_datas = sum(value_list) / counts
        df[i] = true_datas
    else:
        pass
x_cancel_factor=['Unnamed: 0', 'ts_code','trade_date','ann_date_x','f_ann_date_x','end_date_x','ann_date_y','f_ann_date_y','end_date_y',
                  'ann_date','f_ann_date','end_date']
x_cancel_factor.extend(cancel_factor)
y_cancel_factor=['Unnamed: 0','trade_date','ann_date_x','f_ann_date_x','end_date_x','ann_date_y','f_ann_date_y','end_date_y',
                  'ann_date','f_ann_date','end_date']
y_cancel_factor.extend(cancel_factor)
print(cancel_factor)
data_x = df.drop(x_cancel_factor, axis=1)
data_y = df.drop(y_cancel_factor, axis=1)
# data_x = df[['open', 'high', 'low', 'pre_close', 'change', 'pct_chg', 'vol', 'amount','close']]
x=data_x[0:len(data_x)-2].values
y_list=data_y[1:len(data_y)-1]['change'].values
y=[]
for i in y_list:
    if i < 0:
        y.append(-1)
    elif i>=0:
        y.append(1)
    # else:
    #     y.append(0)

# 降低到20维度
estimator = PCA(n_components=5)
estimator.fit(x[:1800])
print('降维训练数据')
pca_x_train = estimator.fit_transform(x[:1800])
print('降维测试数据')
pca_x_test = estimator.transform(x[1801:len(x)-1])

#数据标准化
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(pca_x_train)
X_train_scaled = scaler.transform(pca_x_train)
X_test_scaled = scaler.transform(pca_x_test)

#SVM算法
clf=svm.SVC(C=0.8, kernel='rbf', gamma=20, decision_function_shape='ovr')  ##默认参数:kernel='rbf'
# clf.fit(x[:1800],y[:1800])
clf.fit(X_train_scaled,y[:1800])

print("预测...")
# res=clf.predict(rdm_arr)  ##两个方括号表面传入的参数是矩阵而不是list
res=clf.predict(X_test_scaled)  ##两个方括号表面传入的参数是矩阵而不是list
# 预测的真实值
a=y[1801:len(y)-1]

# 决策树算法
clf1 = tree.DecisionTreeClassifier(criterion='entropy')
clf1.fit(X_train_scaled,y[:1800])
answer = clf1.predict(X_test_scaled)

# 随机森林算法
clf2 = tree.DecisionTreeClassifier()
clf2.fit(X_train_scaled,y[:1800])
answer2 = clf2.predict(X_test_scaled)

# AdaBoost Boosting
from sklearn.ensemble import AdaBoostClassifier  # For Classification
from sklearn.ensemble import AdaBoostRegressor  # For Regression
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier()
clf3 = AdaBoostClassifier(n_estimators=100, base_estimator=dt, learning_rate=1)
# Above I have used decision tree as a base estimator, you can use any ML learner as base estimator if it accepts sample weight
clf3.fit(X_train_scaled,y[:1800])
answer3 = clf3.predict(X_test_scaled)


#根据训练出的模型绘制样本点,用训练数据训练模型
# for i in pca_x_train:
#     res=clf1.predict(np.array(i).reshape(1, -1))
#     # print(i,res)
#     if res > 0:
#         plt.scatter(i[0],i[1],c='r',marker='*')
#     else :
#         plt.scatter(i[0],i[1],c='g',marker='*')
# #回执实验数据点
# for i in pca_x_test:
#     # 数据最终显示图形,对测试数据进行预测
#     res=clf1.predict(np.array(i).reshape(1, -1))
#     b.append(res[0])
#     # print(i,res)
#     if res > 0:
#         # X轴靠近数据显示图形
#         plt.scatter(i[0],i[1],c='r',marker='.')
#     else :
#         plt.scatter(i[0],i[1],c='g',marker='.')

from sklearn.ensemble import GradientBoostingClassifier  # For Classification
from sklearn.ensemble import GradientBoostingRegressor  # For Regression
#  Gradient Boosting
clf4 = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1)
clf4.fit(X_train_scaled,y[:1800])
answer4 = clf4.predict(X_test_scaled)


# LogisticRegression二元分类    scikit-learn中的LogisticRegression
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train_scaled,y[:1800])
answer5 = model.predict(X_test_scaled)
# 朴素贝叶斯
from sklearn.naive_bayes import GaussianNB
model1 = GaussianNB()
model1.fit(X_train_scaled,y[:1800])
answer6 = model.predict(X_test_scaled)


print('svm结果集 score:',accuracy_score(a, res))
print('SVM训练集 score:',clf.score(X_train_scaled,y[:1800] ))
print('SVM测试集 score:',clf.score(X_test_scaled,y[1801:len(y)-1]))
print('决策树 score:',accuracy_score(a, answer))
print('随机森林 score:',accuracy_score(a, answer2))
print('AdaBoost score:',accuracy_score(a, answer3))
print('Gradient Boosting score:',accuracy_score(a, answer4))
print('LogisticRegression score:',accuracy_score(a, answer5))
print('贝叶斯 score:',accuracy_score(a, answer6))

你可能感兴趣的:(回归算法总结)