DataWhale一周算法进阶3---模型融合

文章目录

      • 一 任务
      • 二 代码
      • 问题

一 任务

用你目前评分最高的模型作为基准模型,和其他模型进行stacking融合,得到最终模型及评分果。

二 代码

import pandas as pd 
import numpy as np
import sys
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
import xgboost as xgb
from xgboost import XGBClassifier
import lightgbm as lgb
from lightgbm import LGBMClassifier

from sklearn import metrics
import matplotlib.pyplot as plt

from sklearn.model_selection import KFold 
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer
from sklearn.model_selection import cross_validate
from mlxtend.classifier import StackingClassifier
# 读取数据
data_all = pd.read_csv('./data_all.csv', encoding='gbk')
data_all.head()
# 划分数据集
from sklearn.model_selection import train_test_split
features = [x for x in data_all.columns if x not in ['status']] # 特征
X = data_all[features] # 特征向量
y = data_all['status'] # labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=2018)

# 对数据进行归一化处理
scaler = StandardScaler()
scaler.fit(X_train)
X_train_stand = scaler.transform(X_train)
X_test_stand = scaler.transform(X_test)
# scaler.fit(X)
# X_stand = scaler.transform(X)
X_test_stand
lr = LogisticRegression(random_state=2018,C=0.1)
lgb = LGBMClassifier(boosting_type='GBDT',random_state=2018,silent=0)
gbdt = GradientBoostingClassifier(random_state=2018,max_depth=3,n_estimators=50)
xgbc = XGBClassifier(random_state=2018,max_depth=3,eta=0.1,subsample=0.6)
rf = RandomForestClassifier(n_estimators=500,oob_score=True, random_state=2018)
svm = SVC(random_state=2018,tol=0.01)
sclf = StackingClassifier(classifiers=[lr, gbdt, xgbc,rf,svm], meta_classifier=lgb)
sclf1 = StackingClassifier(classifiers=[gbdt, xgbc,svm], meta_classifier=lgb)
sclf2 = StackingClassifier(classifiers=[gbdt, xgbc,svm], meta_classifier=lr)
sclf3 = StackingClassifier(classifiers=[svm], meta_classifier=lr)
def get_scores(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)   
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    if hasattr(model, "decision_function"):
        y_train_proba = model.decision_function(X_train)
        y_test_proba = model.decision_function(X_test)
    else:
        y_train_proba = (model.predict_proba(X_train))[:, 1]
        y_test_proba = (model.predict_proba(X_test))[:, 1]
    # accuracy
    train_accuracy = metrics.accuracy_score(y_train, y_train_predict)
    test_accuracy = metrics.accuracy_score(y_test, y_test_predict)
    # recision
    train_precision = metrics.precision_score(y_train, y_train_predict)
    test_precision = metrics.precision_score(y_test, y_test_predict)
    # recall
    train_recall = metrics.recall_score(y_train, y_train_predict)
    test_recall = metrics.recall_score(y_test, y_test_predict)
    # f1-score
    train_f1 = metrics.f1_score(y_train, y_train_predict)
    test_f1 = metrics.f1_score(y_test, y_test_predict)
    # auc
    train_auc = metrics.roc_auc_score(y_train, y_train_proba)
    test_auc = metrics.roc_auc_score(y_test, y_test_proba)
    # roc 曲线
    train_fprs,train_tprs,train_thresholds = metrics.roc_curve(y_train, y_train_proba)
    test_fprs,test_tprs,test_thresholds = metrics.roc_curve(y_test, y_test_proba)
    plt.plot(train_fprs, train_tprs)
    plt.plot(test_fprs, test_tprs)
    plt.plot([0,1], [0,1],"--")
    plt.title("ROC curve")
    plt.xlabel("FPR")
    plt.ylabel("TPR")
    plt.legend(labels=["Train AUC:"+str(round(train_auc, 5)),"Test AUC:"+str(round(test_auc,5))], loc="lower right")
    plt.show()
    #输出各种得分
    print("训练集准确率:", train_accuracy)
    print("测试集准确率:", test_accuracy)
    print("==================================")
    print("训练集精准率:", train_precision)
    print("测试集精准率:", test_precision)
    print("==================================")
    print("训练集召回率:", train_recall)
    print("测试集召回率:", test_recall)
    print("==================================")
    print("训练集F1-score:", train_f1)
    print("测试集F1-score:", test_f1)
    print("==================================")
    print("训练集AUC:", train_auc)
    print("测试集AUC:", test_auc)
get_scores(sclf, X_train, X_test, y_train, y_test)

问题

1.hhhhh代码没问题但是都kernel dead了 下次用 google colab试一下

你可能感兴趣的:(算法项目)