可以参考原论文
决策树模型及数据用的是论文中的关系式。先用决策树和简单的数据计算。
import shap
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
X = np.array([[1,0],[1,1],[0,1],[0,0],[1,0],[1,1],[0,1]])
df = pd.DataFrame(X)
df.columns=['fever','cough']
df['target'] = df['fever']*df['cough']*80+10*df['cough']
df1 = df[['fever','cough']]
y = df['target'].values
tree_reg1 = DecisionTreeRegressor()
tree_reg1.fit(df1, y)
explainer_shap = shap.TreeExplainer(tree_reg)
shap_values_all = explainer_shap.shap_values(df1) # 传入特征矩阵X,计算SHAP值
第一个样本的shap值为
shap_values_all[0]
输出为:array([ 11.42857143, -40. ])
all_list = []
for i in range(df.shape[0]):
a =np.array([df['fever'].values[0],df['cough'].values[i]])
all_list.append(tree_reg1.predict(a.reshape(1, -1))[0])
all_list_cough = []
for i in range(df.shape[0]):
a =np.array([df['fever'].values[i],df['cough'].values[0]])
print(a)
all_list_cough.append(tree_reg1.predict(a.reshape(1, -1))[0])
以下分别计算fever和cough的shap值
(tree_reg1.predict(np.array([df['fever'].values[0],df['cough'].values[0]]).reshape(1, -1))[0]-np.array(all_list_cough).mean())/2+\
(np.array(all_list).mean()-explainer_shap.expected_value)/2
output:11.428571564810618
(tree_reg1.predict(np.array([df['fever'].values[0],df['cough'].values[0]]).reshape(1, -1))[0]-np.array(all_list).mean())/2+\
(np.array(all_list_cough).mean()-explainer_shap.expected_value)/2
output:-39.99999986376081
未完待续