python实现lightgbm以及重要参数解析

lightgbm参数解析

max_depth : 树的深度
num_leaves : 树的最大叶子节点数
objective : 目标函数
min_data_in_leaf : 一个叶子上最小数据量
learning_rate : 学习率
feature_fraction : 随机抽取特征的比例
bagging_fraction : 在不进行重采样的情况下随机选择部分数据
bagging_freq : bagging的频率,k意味着每k次迭代执行bagging
metric : 评价指标
is_unbalance : 数据是否平衡

import lightgbm as lg

def print_feature_importance_lgb(gbm):
     print(80 * '*')
     print(31 * '*' + 'Feature Importance' + 31 * '*')
     print(80 * '.')
     print("\n".join((".%50s => %9.5f" % x) for x in sorted(
         zip(gbm.feature_name(), gbm.feature_importance("gain")),
         key=lambda x: x[1],
         reverse=True)))
     print(80 * '.')

def fit_lgb(X_tr,y_tr,X_va,y_va,cates_cols):
	params = {
		'max_depth':8,
		'num_leaves':128,
		'objective':'binary',
		'min_data_in_leaf':20,
		'learning_rate':0.01,
		'feature_fraction':0.9,
		'bagging_fraction':0.8,
		'subsample':0.85,
		'bagging_freq':1,
		'random_state':2019,
		'metric':["binary_logloss"],
		'num_threads':16,
		'is_unbalance':True
	}
     MAX_ROUNDS = 10000
     dtr = lg.Dataset(X_tr, label=y_tr, categorical_feature=cates_cols)
     dva = lg.Dataset(X_va, label=y_va, categorical_feature=cates_cols, reference=dtr)
     cls = lg.train(
	     params,
	     dtr,
	     num_boost_round=MAX_ROUNDS,
	     valid_sets=(dva, dtr),
	     valid_names=['valid', 'train'],
	     early_stopping_rounds=125,
	     verbose_eval=50)
 	print_feature_importance_lgb(cls)
 	lg.plot_importance(cls, importance_type='gain', figsize=(11,12), max_num_features=50, grid=False)
     return cls
gbm = fit_lgb(X_tr, y_tr, X_va, y_va, nominal_cate_cols)

你可能感兴趣的:(机器学习)