lightgbm 调参
x_train,x_val,y_train,y_val = train_test_split(X_data,Y_data,test_size=0.3,random_state=3)
import lightgbm as lgb
from hyperopt import fmin, tpe, hp,space_eval,rand,Trials,partial,STATUS_OK
N_FOLDS = 3
# Create the dataset
train_set = lgb.Dataset(x_train,y_train)
def objective(params, n_folds=N_FOLDS):
'''Objective function for Gradient Boosting Machine Hyperparameter Tuning'''
# Perform n_fold cross validation with hyperparameters
# Use early stopping and evalute based on ROC AUC
cv_results = lgb.cv(params, train_set, nfold=n_folds, num_boost_round=2000,
early_stopping_rounds=50, metrics='mae', seed=50)
# Extract the best score
best_score = min(cv_results['l1-mean'])
# Loss must be minimized
# loss = 1 - best_score
# Dictionary with information for evaluation
return {'loss': best_score, 'params': params, 'status': STATUS_OK}
space = {
# 'class_weight': hp.choice('class_weight', [None, 'balanced']),
'n_estimators':hp.choice('n_estimators',range(50,300)),
# 'boosting_type': hp.choice('boosting_type',
# [{'boosting_type': 'gbdt',
# 'subsample': hp.uniform('gdbt_subsample', 0.5, 1)},
# {'boosting_type': 'dart',
# 'subsample': hp.uniform('dart_subsample', 0.5, 1)},
# {'boosting_type': 'goss'}]),
'num_leaves': hp.choice('num_leaves', range(31,161)),
'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(0.2)),
# 'subsample_for_bin': hp.choice('subsample_for_bin', range(20000,300000,20000)),
'min_child_samples': hp.choice('min_child_samples',range(20,500,5)),
'reg_alpha': hp.uniform('reg_alpha', 0.0, 1.0),
'reg_lambda': hp.uniform('reg_lambda', 0.0, 1.0),
'colsample_bytree': hp.uniform('colsample_by_tree', 0.6, 1.0),
'feature_fraction':hp.uniform('feature_fraction',0.5,0.6),
'max_depth':hp.choice('max_depth',range(6,11)),
'bagging_fraction':hp.uniform('bagging_fraction',0.5,1),
'feature_fraction':hp.uniform('feature_fraction',0.5,1)
}
# Algorithm
tpe_algorithm = tpe.suggest
# Trials object to track progress
bayes_trials = Trials()
MAX_EVALS = 500
# Optimize
best = fmin(fn = objective, space = space, algo = tpe.suggest,
max_evals = MAX_EVALS, trials = bayes_trials)
print(best)
xgboost 调参
N_FOLDS = 3
# Create the dataset
train_set = xgb.DMatrix(x_train,y_train)
def objective(params, n_folds=N_FOLDS):
'''Objective function for Gradient Boosting Machine Hyperparameter Tuning'''
# Perform n_fold cross validation with hyperparameters
# Use early stopping and evalute based on ROC AUC
cv_results = xgb.cv(params, train_set, nfold=n_folds, num_boost_round=2000,
early_stopping_rounds=50, metrics='mae', seed=50)
# Extract the best score
best_score = min(cv_results['test-mae-mean'])
# Loss must be minimized
# loss = 1 - best_score
# Dictionary with information for evaluation
return {'loss': best_score, 'params': params, 'status': STATUS_OK}
space = {
# 'class_weight': hp.choice('class_weight', [None, 'balanced']),
'n_estimators':hp.choice('n_estimators',range(50,300)),
'num_leaves': hp.choice('num_leaves', range(31,161)),
'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(0.2)),
'min_child_weight': hp.choice('min_child_samples',range(1,9)),
'reg_alpha': hp.uniform('reg_alpha', 0.0, 1.0),
'reg_lambda': hp.uniform('reg_lambda', 0.0, 1.0),
'gamma':hp.uniform('gamma',0.0,0.5),
'max_depth':hp.choice('max_depth',range(5,11)),
'subsample':hp.uniform('bagging_fraction',0.5,1),
'colsample_bytree':hp.uniform('feature_fraction',0.5,1)
}
# Algorithm
tpe_algorithm = tpe.suggest
# Trials object to track progress
bayes_trials = Trials()
MAX_EVALS = 150
# Optimize
best = fmin(fn = objective, space = space, algo = tpe.suggest,
max_evals = MAX_EVALS, trials = bayes_trials)
print(best)