LightGBM参数的贝叶斯搜索

首先定义要优化的目标:

#定义要优化的目标
def LGB_L1_bayesian(num_leaves, learning_rate, feature_fraction,
                lambda_l1, lambda_l2, max_depth, bagging_fraction, bagging_freq):
    
    # LightGBM expects next three parameters need to be integer. So we make them integer
    num_leaves = int(num_leaves)
    max_depth = int(max_depth)
    bagging_freq = int(bagging_freq)
    
    assert type(num_leaves) == int
    assert type(max_depth) == int
    assert type(bagging_freq) == int
    
    param = {
        'num_leaves': num_leaves,
        'learning_rate': learning_rate,
        'bagging_fraction': bagging_fraction,
        'bagging_freq': bagging_freq,
        'feature_fraction': feature_fraction,
        'lambda_l1': lambda_l1,
        'lambda_l2': lambda_l2,
        'max_depth': max_depth,
        'save_binary': True, 
        'seed': 1337,
        'feature_fraction_seed': 1337,
        'bagging_seed': 1337,
        'drop_seed': 1337,
        'data_random_seed': 1337,
        'objective': 'binary',
        'boosting_type': 'gbdt',
        'verbose': 1,
        'metric': 'mae',
        'is_unbalance': True,
        'boost_from_average': False,  
        'objective': 'regression_l1',

    }    
    
    
    xg_train = lgb.Dataset(train_data.drop(['uid'],axis=1).iloc[bayesian_tr_index].values,
                           label=train_label.iloc[bayesian_tr_index].values
                           )
    xg_valid = lgb.Dataset(train_data.drop(['uid'],axis=1).iloc[bayesian_val_index].values,
                           label=train_label.iloc[bayesian_val_index].values
                           )   

    num_round = 10000
    clf = lgb.train(param, xg_train, num_round, valid_sets = [xg_valid], verbose_eval=500, early_stopping_rounds = 50)
    
    predictions = clf.predict(train_data.drop(['uid'],axis=1).iloc[bayesian_val_index].values, num_iteration=clf.best_iteration)   
    
    score = 1 / ( 1 + metrics.mean_absolute_error(train_label.iloc[bayesian_val_index].values, predictions) )
    
    return score

有了优化目标后,就可以开始设置优化器并进行优化了

#请先安装bayes_opt
from bayes_opt import BayesianOptimization 

#参数范围设定
bounds_LGB_L1 = {
    'num_leaves': (20, 50), 
    'learning_rate': (0.005, 0.1),   
    'feature_fraction': (0.1, 1),
    'lambda_l1': (0, 10.0), 
    'lambda_l2': (0, 10.0), 
    'max_depth':(3,15),
    'bagging_fraction':(0.2,1),
    'bagging_freq':(1,10),
}

#优化器
LGB_BO = BayesianOptimization(LGB_L1_bayesian, bounds_LGB_L1, random_state=13)

init_points = 100 #初始随机尝试
n_iter = 100      #优化尝试
#开始优化
LGB_BO.maximize(init_points=init_points, n_iter=n_iter, acq='ucb', xi=0.0, alpha=1e-6)

#利用已知的合理参数进行probe,或许能进一步改善优化结果
LGB_BO.probe(
    params={'feature_fraction': 0.6,
            'bagging_fraction': 0.8,
            'bagging_freq': 2,
            'lambda_l2': 5, 
            'lambda_l1': 0,
            'learning_rate': 0.01,
            'max_depth': 5, 
            'num_leaves': 31},
    lazy=True, # 
)

#注意,这里init_points和n_iter都要设为0
LGB_BO.maximize(init_points=0, n_iter=0)
#查看最优化的score
LGB_BO.max['target']

#查看优化得到的参数
LGB_BO.max['params']

然后利用得到的参数,设置为light gbm的参数即可。

注意:lightGBM的bagging_freq, max_depth, num_leaves 等参数必须为整数,需要对优化得到的参数进行取整

 

后记:不知道是不是自己设置的bounds不好,搜了半天最后结果还是probe的那个。。。

你可能感兴趣的:(LightGBM参数的贝叶斯搜索)