python实现catboost分类器以及部分参数解释

catboost参数解析

iterations : 最大树数
od_type : 过拟合检查类型
od_wait:在最小化损失函数后的迭代次数
max_depth : 最大深度
learning_rate : 学习率
l2_leaf_reg : L2正则参数
fold_len_multiplier : folds长度系数。设置大于1的参数,在参数较小时获得最佳结果。默认2。
loss_function : 损失函数

import operator
cates_idx = [X_tr.columns.values.tolist().index(c) for c in nominal_cate_cols]
def verbose_feature_importance_cat(cls, X_tr):
	cat_feature_importance = {
         X_tr.columns.values.tolist()[idx]: score
         for idx, score in enumerate(cls.feature_importances_)
     }
     cat_feature_importance = sorted(cat_feature_importance.items(), 
                                     key=operator.itemgetter(1), 
                                     reverse=False)
     print(80 * '*')
     print(31 * '*' + 'Feature Importance' + 31 * '*')
     print(80 * '.')
     for feature, score in reversed(cat_feature_importance):
         print(".%50s => %9.5f" % (feature, score))
     print(80 * '.')
     feature_score = pd.DataFrame(cat_feature_importance, columns=['Feature','Score'])
     plt.rcParams["figure.figsize"] = (11, 12)
     ax = feature_score.tail(50).plot('Feature', 'Score', kind='barh', color='b')
     ax.set_title("Catboost Feature Importance Ranking", fontsize=8)
     ax.set_xlabel('')
     rects = ax.patches

     labels = feature_score.tail(50)['Score'].round(2)
     for rect, label in zip(rects, labels):
         width = rect.get_width()
         ax.text(width + 0.2,rect.get_y()+0.02, label, ha='center', va='bottom')
     plt.show()
     
def fit_cat(X_tr, y_tr, X_va, y_va, cates_idx):
     print('Fitting CatBoostClassifier ...')
     cls = cb.CatBoostClassifier(
         iterations=2000,
         od_type='Iter',
         od_wait=120,
         max_depth=8,
         learning_rate=0.02,
         l2_leaf_reg=9,
         random_seed=2018,
         metric_period=50,
         fold_len_multiplier=1.1,
         loss_function='Logloss',
         logging_level='Verbose')
     fine_model = cls.fit(X_tr, y_tr, eval_set=(X_va, y_va), cat_features=cates_idx)
     verbose_feature_importance_cat(fine_model, X_tr)
     return fine_model
cat = fit_cat(X_tr, y_tr, X_va, y_va, cates_idx)                             

你可能感兴趣的:(机器学习)