lightGBM分类模型

import lightgbm as lgb
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, precision_score, recall_score
import warnings


warnings.filterwarnings('ignore')

X_train, X_test = data[~data['label'].isna()], data[data['label'].isna()]
y = X_train['label']
KF = StratifiedKFold(n_splits=5, shuffle=True, random_state=2021)
# params = {
#     'verbose': -1,
#     'num_leaves':64,
#     'max_depth':10,
#     'learning_rate':0.01,
#     'n_estimators':10000,
#     'subsample':0.8,
#     'feature_fraction':0.8,
#     'reg_alpha':0.5,
#     'reg_lambda':0.5,
#     'random_state':100,
#     'metric':'auc'
# }
parameters = {
    'learning_rate': 0.05,
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': 'auc',
    'num_leaves': 32,
    'feature_fraction': 0.8,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'seed': 2020,
    'bagging_seed': 1,
    'feature_fraction_seed': 7,
    'min_data_in_leaf': 20,
    'n_jobs': -1, 
    'verbose': -1,
}
oof_lgb = np.zeros(len(X_train))

for fold_, (trn_idx, val_idx) in enumerate(KF.split(X_train.values, y.values)):
    print("fold n°{}".format(fold_))
    trn_data = lgb.Dataset(X_train.iloc[trn_idx][features],label=y.iloc[trn_idx])    
    val_data = lgb.Dataset(X_train.iloc[val_idx][features],label=y.iloc[val_idx])
    num_round = 10000
    clf = lgb.train(
        params,
        trn_data,
        num_round,
        valid_sets = [trn_data, val_data],
        verbose_eval=500,
        early_stopping_rounds=100,  
        
    )
        
    oof_lgb[val_idx] = clf.predict(X_train.iloc[val_idx][features], num_iteration=clf.best_iteration)
    clf.save_model(f'model/model_{fold_}.txt')
    
print("AUC score: {}".format(roc_auc_score(y, oof_lgb)))
print("F1 score: {}".format(f1_score(y, [1 if i >= 0.5 else 0 for i in oof_lgb])))
print("Precision score: {}".format(precision_score(y, [1 if i >= 0.5 else 0 for i in oof_lgb])))
print("Recall score: {}".format(recall_score(y, [1 if i >= 0.5 else 0 for i in oof_lgb])))

你可能感兴趣的:(机器学习与深度学习算法,python,二分类)