lightgbm简单网格搜索

folds = KFold(n_splits=5, shuffle=True, random_state=1333)

oof_lgb = np.zeros(len(train))
predictions_lgb = np.zeros(len(test))
feature_importance_data = pd.DataFrame()

best_score = 0
learning_rate = [0.01, 0.05, 0.1]
reg_lambda = [0.01, 0.1, 1, 5]
reg_alpha = [0.01, 0.1, 1, 5]

for i in range(len(learning_rate)):
    for j in range(len(reg_lambda)):
        for k in range(len(reg_alpha)):
            
            print('the learning_rate is:',learning_rate[i])
            print('the reg_lambda is:',reg_lambda[j])
            print('the reg_alpha is:',reg_alpha[k])
            print('\n')
            
            params['learning_rate'] = learning_rate[i]
            params['reg_lambda'] = reg_lambda[j]
            params['reg_alpha'] = reg_alpha[k]

            for fold_, (trn_idx, val_idx) in enumerate(folds.split(train.values, target.values)):
                print("fold {}".format(fold_))
                trn_data = lgb.Dataset(train.iloc[trn_idx], label=target.iloc[trn_idx], categorical_feature=categorical_feats)
                val_data = lgb.Dataset(train.iloc[val_idx], label=target.iloc[val_idx], categorical_feature=categorical_feats)

                num_round = 30000
                clf = lgb.train(params, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=500, early_stopping_rounds = 200, feval = eval_f)

                oof_lgb[val_idx] = clf.predict(train.iloc[val_idx], num_iteration=clf.best_iteration)

                fold_importance_data = pd.DataFrame()
                fold_importance_data["feature"] = train.columns
                fold_importance_data["importance"] = clf.feature_importance()
                fold_importance_data["fold"] = fold_ + 1
                feature_importance_data = pd.concat([feature_importance_data, fold_importance_data], axis=0)

                predictions_lgb += clf.predict(test, num_iteration=clf.best_iteration) / folds.n_splits
                
                score = r2_score(target,oof_lgb)
                
                if score > best_score:#找到表现最好的参数
                    best_score = score
                    best_parameters = {'learning_rate':learning_rate[i],'reg_lambda':reg_lambda[j],'reg_alpha':reg_alpha[k]}
                    
            print("Best score:{:.9f}".format(best_score))
            print('\n\n')
            print('Try time:',(i+1)*(j+1)*(k+1))
            print("the now's  Best parameters:{}".format(best_parameters))
print('\n')                    

print("Best score:{:.9f}".format(best_score))
print('*' * 30)                   
print("The fina Best parameters:{}".format(best_parameters))
print('*' * 30)                   
print("CV Score: {:<8.5f}".format(r2_score(target, oof_lgb)))#0.892424 0.893725 0.898585 0.904263 0.914873

你可能感兴趣的:(数据挖掘)