xgboost的plot_importance绘图时出现的f0、f1、f2、f3、f4、f5等改为对应特征的字段名

将xgboost的plot_importance绘图时出现的f0、f1、f2、f3、f4、f5等改为对应特征的字段名

xgboost输出特征重要度

操作总结

进入xgboost.plot_importance函数定义, plotting.py , 把 booster.get_score(importance_type=importance_type) 改成 booster.get_score(importance_type=importance_type, fmap=fmap) 亲测好使

代码部分

# 调用文件
import xgboost as xgb
import numpy as np
import json
import matplotlib.pyplot as plt
from matplotlib import pyplot
from xgboost import plot_tree

model_file = "data/xgboost.model"
fmap = model_file + ".fmap"
bst = xgb.Booster(model_file = model_file)
print bst.get_fscore(fmap=fmap)
xgb.to_graphviz(bst, num_trees=10, fmap=fmap)
xgb.plot_importance(bst, fmap=fmap)

只需修改第67行

# plot_importance修改
# Python/2.7/lib/python/site-packages/xgboost/plotting.py
def plot_importance(booster, ax=None, height=0.2,
                    xlim=None, ylim=None, title='Feature importance',
                    xlabel='F score', ylabel='Features',
                    importance_type='weight', max_num_features=None,
                    grid=True, show_values=True, fmap='', **kwargs):
    """Plot importance based on fitted trees.

    Parameters
    ----------
    booster : Booster, XGBModel or dict
        Booster or XGBModel instance, or dict taken by Booster.get_fscore()
    ax : matplotlib Axes, default None
        Target axes instance. If None, new figure and axes will be created.
    grid : bool, Turn the axes grids on or off.  Default is True (On).
    importance_type : str, default "weight"
        How the importance is calculated: either "weight", "gain", or "cover"

        * "weight" is the number of times a feature appears in a tree
        * "gain" is the average gain of splits which use the feature
        * "cover" is the average coverage of splits which use the feature
          where coverage is defined as the number of samples affected by the split
    max_num_features : int, default None
        Maximum number of top features displayed on plot. If None, all features will be displayed.
    height : float, default 0.2
        Bar height, passed to ax.barh()
    xlim : tuple, default None
        Tuple passed to axes.xlim()
    ylim : tuple, default None
        Tuple passed to axes.ylim()
    title : str, default "Feature importance"
        Axes title. To disable, pass None.
    xlabel : str, default "F score"
        X axis title label. To disable, pass None.
    ylabel : str, default "Features"
        Y axis title label. To disable, pass None.
    show_values : bool, default True
        Show values on plot. To disable, pass False.
    kwargs :
        Other keywords passed to ax.barh()

    Returns
    -------
    ax : matplotlib Axes
    """
    # TODO: move this to compat.py
    try:
        import matplotlib.pyplot as plt
    except ImportError:
        raise ImportError('You must install matplotlib to plot importance')

    if isinstance(booster, XGBModel):
        importance = booster.get_booster().get_score(importance_type=importance_type)
    elif isinstance(booster, Booster):
        # 只是在这里添加了fmap 其余都一样 只改这个函数这一行
        importance = booster.get_score(importance_type=importance_type, fmap=fmap)
    elif isinstance(booster, dict):
        importance = booster
    else:
        raise ValueError('tree must be Booster, XGBModel or dict instance')

fmap生成

features2 = ['core_id', 'what_id', 'where_id', 'extra_id', 'category_group_id', 'chain_id', 'address_id']
                    
if __name__=="__main__":
    f1 = open(sys.argv[1], "w") 
    for i, feat in enumerate(features2):      
        f1.write('{0}\t{1}\tq\n'.format(i, feat))        #feature type, use i for indicator and q for quantity  outfile.close()

print("Done!")

你可能感兴趣的:(Python,python,机器学习,决策树)