用你目前评分最高的模型作为基准模型,和其他模型进行stacking融合,得到最终模型及评分果。
import pandas as pd
import numpy as np
import sys
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
import xgboost as xgb
from xgboost import XGBClassifier
import lightgbm as lgb
from lightgbm import LGBMClassifier
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer
from sklearn.model_selection import cross_validate
from mlxtend.classifier import StackingClassifier
# 读取数据
data_all = pd.read_csv('./data_all.csv', encoding='gbk')
data_all.head()
# 划分数据集
from sklearn.model_selection import train_test_split
features = [x for x in data_all.columns if x not in ['status']] # 特征
X = data_all[features] # 特征向量
y = data_all['status'] # labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=2018)
# 对数据进行归一化处理
scaler = StandardScaler()
scaler.fit(X_train)
X_train_stand = scaler.transform(X_train)
X_test_stand = scaler.transform(X_test)
# scaler.fit(X)
# X_stand = scaler.transform(X)
X_test_stand
lr = LogisticRegression(random_state=2018,C=0.1)
lgb = LGBMClassifier(boosting_type='GBDT',random_state=2018,silent=0)
gbdt = GradientBoostingClassifier(random_state=2018,max_depth=3,n_estimators=50)
xgbc = XGBClassifier(random_state=2018,max_depth=3,eta=0.1,subsample=0.6)
rf = RandomForestClassifier(n_estimators=500,oob_score=True, random_state=2018)
svm = SVC(random_state=2018,tol=0.01)
sclf = StackingClassifier(classifiers=[lr, gbdt, xgbc,rf,svm], meta_classifier=lgb)
sclf1 = StackingClassifier(classifiers=[gbdt, xgbc,svm], meta_classifier=lgb)
sclf2 = StackingClassifier(classifiers=[gbdt, xgbc,svm], meta_classifier=lr)
sclf3 = StackingClassifier(classifiers=[svm], meta_classifier=lr)
def get_scores(model, X_train, X_test, y_train, y_test):
model.fit(X_train, y_train)
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)
if hasattr(model, "decision_function"):
y_train_proba = model.decision_function(X_train)
y_test_proba = model.decision_function(X_test)
else:
y_train_proba = (model.predict_proba(X_train))[:, 1]
y_test_proba = (model.predict_proba(X_test))[:, 1]
# accuracy
train_accuracy = metrics.accuracy_score(y_train, y_train_predict)
test_accuracy = metrics.accuracy_score(y_test, y_test_predict)
# recision
train_precision = metrics.precision_score(y_train, y_train_predict)
test_precision = metrics.precision_score(y_test, y_test_predict)
# recall
train_recall = metrics.recall_score(y_train, y_train_predict)
test_recall = metrics.recall_score(y_test, y_test_predict)
# f1-score
train_f1 = metrics.f1_score(y_train, y_train_predict)
test_f1 = metrics.f1_score(y_test, y_test_predict)
# auc
train_auc = metrics.roc_auc_score(y_train, y_train_proba)
test_auc = metrics.roc_auc_score(y_test, y_test_proba)
# roc 曲线
train_fprs,train_tprs,train_thresholds = metrics.roc_curve(y_train, y_train_proba)
test_fprs,test_tprs,test_thresholds = metrics.roc_curve(y_test, y_test_proba)
plt.plot(train_fprs, train_tprs)
plt.plot(test_fprs, test_tprs)
plt.plot([0,1], [0,1],"--")
plt.title("ROC curve")
plt.xlabel("FPR")
plt.ylabel("TPR")
plt.legend(labels=["Train AUC:"+str(round(train_auc, 5)),"Test AUC:"+str(round(test_auc,5))], loc="lower right")
plt.show()
#输出各种得分
print("训练集准确率:", train_accuracy)
print("测试集准确率:", test_accuracy)
print("==================================")
print("训练集精准率:", train_precision)
print("测试集精准率:", test_precision)
print("==================================")
print("训练集召回率:", train_recall)
print("测试集召回率:", test_recall)
print("==================================")
print("训练集F1-score:", train_f1)
print("测试集F1-score:", test_f1)
print("==================================")
print("训练集AUC:", train_auc)
print("测试集AUC:", test_auc)
get_scores(sclf, X_train, X_test, y_train, y_test)
1.hhhhh代码没问题但是都kernel dead了 下次用 google colab试一下