【XGBoost】XGBoost实战俩小实验

目录

1.导包

2.鸢尾花分类

3.波士顿房价回归

4.完整代码


1.导包

先导包:

import time
import xgboost as xgb
from xgboost import plot_importance,plot_tree
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_boston
import matplotlib
matplotlib.use('TkAgg')  # 'TkAgg' can show GUI in imshow()
# matplotlib.use('Agg')  # 'Agg' will not show GUI
from matplotlib import pyplot as plt
import os
os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.37/bin' # alter the graphviz bin path here
plot_DT = False # graphviz 用来绘制决策树,时间比较久

2.鸢尾花分类

使用XGBoost的分类器,按照Sklearn的八股文式定义法,训练模型并测试。

最终在鸢尾花数据的分类,可以达到96.67%的准确性:

【XGBoost】XGBoost实战俩小实验_第1张图片

绘制的特征重要性如下图:

【XGBoost】XGBoost实战俩小实验_第2张图片

3.波士顿房价回归

同上使用XGBoost的回归器,按照Sklearn的八股文式定义法,训练模型。

绘制的特征重要性如下图:【XGBoost】XGBoost实战俩小实验_第3张图片

4.完整代码

完整代码如下,备注完整,需要自取:

import time
import xgboost as xgb
from xgboost import plot_importance,plot_tree
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_boston
import matplotlib
matplotlib.use('TkAgg')  # 'TkAgg' can show GUI in imshow()
# matplotlib.use('Agg')  # 'Agg' will not show GUI
from matplotlib import pyplot as plt
import os
os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.37/bin' # alter the graphviz bin path here
plot_DT = False


def XGBClassifier_Example():
    # 加载样本数据集
    iris = load_iris()
    X,y = iris.data,iris.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234565) # 数据集分割

    # 训练模型
    model = xgb.XGBClassifier(max_depth=5, learning_rate=0.1, n_estimators=160, silent=True, objective='multi:softmax')
    model.fit(X_train, y_train)

    # 对测试集进行预测
    y_pred = model.predict(X_test)

    # 计算准确率
    accuracy = accuracy_score(y_test,y_pred)
    print("accuarcy: %.2f%%" % (accuracy*100.0))

    # 显示重要特征
    plot_importance(model)
    plt.show()

    if plot_DT is True:
        plot_tree(model, num_trees=0, rankdir='LR')
        fig = plt.gcf()
        # fig.set_size_inches(150, 100)
        fig.savefig('tree.png')

def XGBRegressor_Example():
    boston = load_boston()
    X,y = boston.data,boston.target

    # XGBoost训练过程
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    model = xgb.XGBRegressor(max_depth=5, learning_rate=0.1, n_estimators=160, silent=True, objective='reg:gamma')
    model.fit(X_train, y_train)

    # 对测试集进行预测
    ans = model.predict(X_test)

    # 显示XGBoost学到的第几颗树
    if plot_DT is True:
        plot_tree(model, num_trees=0) # plot the decison tree for the 0-th tree, specify num_trees = n to show n-th tree
        plot_tree(model, num_trees=2, rankdir = 'LR') # 5-th tree, layout: from left to right

    # 显示重要特征
    plot_importance(model)
    plt.show()


XGBClassifier_Example()
XGBRegressor_Example()

你可能感兴趣的:(python,sklearn,机器学习)