决策树回归模型和带AdaBoost算法的决策树回归模型

以Python自带的数据集boston为数据集,建立估算房屋价格的模型:

import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn import datasets
from sklearn.metrics import mean_squared_error,explained_variance_score
from sklearn.utils import shuffle
import matplotlib.pyplot as plt

housing_data =datasets.load_boston()
X,y = shuffle(housing_data.data,housing_data.target,random_state=7)
#random_state用来控制如何打乱数据
num_training = int(0.8*len(X))
X_train,y_train = X[:num_training],y[:num_training]
# a[1:]左闭右开原则,从1开始截取
X_test ,y_test = X[num_training:],y[num_training:]

#拟合一个决策树回归模型,最大深度为4,可以限制决策树不变为任意深度
dt_regressor = DecisionTreeRegressor(max_depth=4)
dt_regressor.fit(X_train,y_train)

#AdaBoost算法的决策树回归模型进行拟合
ab_regressor = AdaBoostRegressor(DecisionTreeRegressor(max_depth = 4),n_estimators=400,random_state=7)
ab_regressor.fit(X_train,y_train)

#评价决策树回归器的训练效果
y_pred_dt = dt_regressor.predict(X_test)
mse = mean_squared_error(y_test,y_pred_dt)
evs = explained_variance_score(y_test,y_pred_dt)
print("#### 决策树学习效果: ####")
print("mean_squared_error:%.2f" %mse)
print("Explained variance score:%.2f" %evs)

#AdaBoost算法改善的效果
y_pred_ab = ab_regressor.predict(X_test)
mse = mean_squared_error(y_test,y_pred_ab)
evs = explained_variance_score(y_test,y_pred_ab)
print("#### AdaBoost算法学习效果: ####")
print("mean_squared_error:%.2f" %mse)
print("Explained variance score:%.2f" %evs)

dt_feature_importances =dt_regressor.tree_.compute_feature_importances(normalize=False)
dt_feature_importances2 = dt_regressor.feature_importances_
ab_feature_importances = ab_regressor.feature_importances_
print("dt feature importances :"+str(dt_feature_importances2))
print("ab feature importances :"+str(ab_feature_importances))

def plot_feature_importance(feature_importances,title,feature_names):
    #将重要性值标准化
    feature_importances = 100.0 * (feature_importances / max(feature_importances))
    #将得分从高到低排序
    index_sorted = np.flipud(np.argsort(feature_importances))
#     np.argsort:将矩阵a按照axis排序(默认升序),并返回排序后的下标 )
#   (np.flipud:矩阵上下翻转,np.fliplr:矩阵左右反转)
    #让X坐标轴上的标签居中显示
    pos = np.arange(index_sorted.shape[0]) + 0.5
    #shape读取矩阵的长度,比如shape[0]就是读取矩阵第一维度的长度
    #画条形图
    plt.figure()
    plt.bar(pos,feature_importances[index_sorted],align='center')
    plt.xticks(pos,feature_names[index_sorted])
    plt.ylabel("Relative Importance")
    plt.title(title)
    plt.show()

plot_feature_importance(dt_feature_importances2,"Decision Tree Regressor",housing_data.feature_names)
plot_feature_importance(ab_feature_importances,"AdaBoost Regressor",housing_data.feature_names)
#现实生活中,如果对这个数据集建立不同的回归器,会发现最重要的特征是LSTAT,
#与加入AdaBoost算法后估计的一致(不带AdaBoost算法的决策树回归器显示的最重要特征是RM),
#这足以体现AdaBoost算法对决策树回归器训练效果的改善

关于AdaBoost的笔记:

决策树回归模型和带AdaBoost算法的决策树回归模型_第1张图片

 

你可能感兴趣的:(Python机器学习)