首先定义要优化的目标:
#定义要优化的目标
def LGB_L1_bayesian(num_leaves, learning_rate, feature_fraction,
lambda_l1, lambda_l2, max_depth, bagging_fraction, bagging_freq):
# LightGBM expects next three parameters need to be integer. So we make them integer
num_leaves = int(num_leaves)
max_depth = int(max_depth)
bagging_freq = int(bagging_freq)
assert type(num_leaves) == int
assert type(max_depth) == int
assert type(bagging_freq) == int
param = {
'num_leaves': num_leaves,
'learning_rate': learning_rate,
'bagging_fraction': bagging_fraction,
'bagging_freq': bagging_freq,
'feature_fraction': feature_fraction,
'lambda_l1': lambda_l1,
'lambda_l2': lambda_l2,
'max_depth': max_depth,
'save_binary': True,
'seed': 1337,
'feature_fraction_seed': 1337,
'bagging_seed': 1337,
'drop_seed': 1337,
'data_random_seed': 1337,
'objective': 'binary',
'boosting_type': 'gbdt',
'verbose': 1,
'metric': 'mae',
'is_unbalance': True,
'boost_from_average': False,
'objective': 'regression_l1',
}
xg_train = lgb.Dataset(train_data.drop(['uid'],axis=1).iloc[bayesian_tr_index].values,
label=train_label.iloc[bayesian_tr_index].values
)
xg_valid = lgb.Dataset(train_data.drop(['uid'],axis=1).iloc[bayesian_val_index].values,
label=train_label.iloc[bayesian_val_index].values
)
num_round = 10000
clf = lgb.train(param, xg_train, num_round, valid_sets = [xg_valid], verbose_eval=500, early_stopping_rounds = 50)
predictions = clf.predict(train_data.drop(['uid'],axis=1).iloc[bayesian_val_index].values, num_iteration=clf.best_iteration)
score = 1 / ( 1 + metrics.mean_absolute_error(train_label.iloc[bayesian_val_index].values, predictions) )
return score
有了优化目标后,就可以开始设置优化器并进行优化了
#请先安装bayes_opt
from bayes_opt import BayesianOptimization
#参数范围设定
bounds_LGB_L1 = {
'num_leaves': (20, 50),
'learning_rate': (0.005, 0.1),
'feature_fraction': (0.1, 1),
'lambda_l1': (0, 10.0),
'lambda_l2': (0, 10.0),
'max_depth':(3,15),
'bagging_fraction':(0.2,1),
'bagging_freq':(1,10),
}
#优化器
LGB_BO = BayesianOptimization(LGB_L1_bayesian, bounds_LGB_L1, random_state=13)
init_points = 100 #初始随机尝试
n_iter = 100 #优化尝试
#开始优化
LGB_BO.maximize(init_points=init_points, n_iter=n_iter, acq='ucb', xi=0.0, alpha=1e-6)
#利用已知的合理参数进行probe,或许能进一步改善优化结果
LGB_BO.probe(
params={'feature_fraction': 0.6,
'bagging_fraction': 0.8,
'bagging_freq': 2,
'lambda_l2': 5,
'lambda_l1': 0,
'learning_rate': 0.01,
'max_depth': 5,
'num_leaves': 31},
lazy=True, #
)
#注意,这里init_points和n_iter都要设为0
LGB_BO.maximize(init_points=0, n_iter=0)
#查看最优化的score
LGB_BO.max['target']
#查看优化得到的参数
LGB_BO.max['params']
然后利用得到的参数,设置为light gbm的参数即可。
注意:lightGBM的bagging_freq, max_depth, num_leaves 等参数必须为整数,需要对优化得到的参数进行取整
后记:不知道是不是自己设置的bounds不好,搜了半天最后结果还是probe的那个。。。