算法介绍
- 随机森林
- GBDT
- XGBoost
- LightGBM
导入包
import pandas as pd
import warnings
from sklearn.preprocessing import scale
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost.sklearn import XGBClassifier
import lightgbm as lgb
读取数据
data_all = pd.read_csv('data_all.csv', encoding='gbk')
df_y=data_all['status']
df_X=data_all.drop(columns=['status'])
df_X=scale(df_X,axis=0)
模型构建
lr = LogisticRegression(random_state=2018,tol=1e-6)
tree = DecisionTreeClassifier(random_state=2018)
svm = SVC(probability=True,random_state=2018,tol=1e-6)
forest=RandomForestClassifier(n_estimators=100,random_state=2018)
Gbdt=GradientBoostingClassifier(random_state=2018)
Xgbc=XGBClassifier(random_state=2018)
gbm=lgb.LGBMClassifier(random_state=2018)
五折交叉验证
def muti_score(model):
warnings.filterwarnings('ignore')
accuracy = cross_val_score(model, df_X, df_y, scoring='accuracy', cv=5)
precision = cross_val_score(model, df_X, df_y, scoring='precision', cv=5)
recall = cross_val_score(model, df_X, df_y, scoring='recall', cv=5)
f1_score = cross_val_score(model, df_X, df_y, scoring='f1', cv=5)
auc = cross_val_score(model, df_X, df_y, scoring='roc_auc', cv=5)
print("准确率:",accuracy.mean())
print("精确率:",precision.mean())
print("召回率:",recall.mean())
print("F1_score:",f1_score.mean())
print("AUC:",auc.mean())
运行
model_name=["lr","tree","svm","forest","Gbdt","Xgbc","gbm"]
for name in model_name:
model=eval(name)
print(name)
muti_score(model)