进入xgboost.plot_importance函数定义, plotting.py , 把 booster.get_score(importance_type=importance_type) 改成 booster.get_score(importance_type=importance_type, fmap=fmap) 亲测好使
# 调用文件
import xgboost as xgb
import numpy as np
import json
import matplotlib.pyplot as plt
from matplotlib import pyplot
from xgboost import plot_tree
model_file = "data/xgboost.model"
fmap = model_file + ".fmap"
bst = xgb.Booster(model_file = model_file)
print bst.get_fscore(fmap=fmap)
xgb.to_graphviz(bst, num_trees=10, fmap=fmap)
xgb.plot_importance(bst, fmap=fmap)
只需修改第67行
# plot_importance修改
# Python/2.7/lib/python/site-packages/xgboost/plotting.py
def plot_importance(booster, ax=None, height=0.2,
xlim=None, ylim=None, title='Feature importance',
xlabel='F score', ylabel='Features',
importance_type='weight', max_num_features=None,
grid=True, show_values=True, fmap='', **kwargs):
"""Plot importance based on fitted trees.
Parameters
----------
booster : Booster, XGBModel or dict
Booster or XGBModel instance, or dict taken by Booster.get_fscore()
ax : matplotlib Axes, default None
Target axes instance. If None, new figure and axes will be created.
grid : bool, Turn the axes grids on or off. Default is True (On).
importance_type : str, default "weight"
How the importance is calculated: either "weight", "gain", or "cover"
* "weight" is the number of times a feature appears in a tree
* "gain" is the average gain of splits which use the feature
* "cover" is the average coverage of splits which use the feature
where coverage is defined as the number of samples affected by the split
max_num_features : int, default None
Maximum number of top features displayed on plot. If None, all features will be displayed.
height : float, default 0.2
Bar height, passed to ax.barh()
xlim : tuple, default None
Tuple passed to axes.xlim()
ylim : tuple, default None
Tuple passed to axes.ylim()
title : str, default "Feature importance"
Axes title. To disable, pass None.
xlabel : str, default "F score"
X axis title label. To disable, pass None.
ylabel : str, default "Features"
Y axis title label. To disable, pass None.
show_values : bool, default True
Show values on plot. To disable, pass False.
kwargs :
Other keywords passed to ax.barh()
Returns
-------
ax : matplotlib Axes
"""
# TODO: move this to compat.py
try:
import matplotlib.pyplot as plt
except ImportError:
raise ImportError('You must install matplotlib to plot importance')
if isinstance(booster, XGBModel):
importance = booster.get_booster().get_score(importance_type=importance_type)
elif isinstance(booster, Booster):
# 只是在这里添加了fmap 其余都一样 只改这个函数这一行
importance = booster.get_score(importance_type=importance_type, fmap=fmap)
elif isinstance(booster, dict):
importance = booster
else:
raise ValueError('tree must be Booster, XGBModel or dict instance')
features2 = ['core_id', 'what_id', 'where_id', 'extra_id', 'category_group_id', 'chain_id', 'address_id']
if __name__=="__main__":
f1 = open(sys.argv[1], "w")
for i, feat in enumerate(features2):
f1.write('{0}\t{1}\tq\n'.format(i, feat)) #feature type, use i for indicator and q for quantity outfile.close()
print("Done!")