机器学习练习bagging&RandomForest

import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier,RandomForestClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.datasets import load_wine
import numpy as np
# 加载数据集
wine = load_wine()
print(wine.feature_names)
x = pd.DataFrame(wine.data,columns=wine.feature_names)
y = pd.Series(wine.target)
print(x)

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,train_size=0.8,random_state=1,shuffle=True)
print(x_train.shape)

#使用决策树作为基底算法
DT = DecisionTreeClassifier(criterion='gini',max_depth=1,random_state=1)
DT.fit(x_train,y_train)
DT_pred = DT.predict(x_test)
print("决策树算法的精度为:{}".format(accuracy_score(y_test ,DT_pred)))

# 使用基准的DT作为个体算法,用bagging做集成学习
bag = BaggingClassifier(base_estimator=DT,n_estimators=50,random_state=1)
bag.fit(x_train,y_train)
bag_pred = bag.predict(x_test)
print("bagging_50算法的精度为:{}".format(accuracy_score(y_test,bag_pred)))

x = list(range(2,102,2))
print(x)
plt.figure(0)
plt.style.use('ggplot')
plt.title("The effect of num_estimators",pad=20)
plt.xlabel("Num of estimators")
plt.ylabel("Test accuracy of BaggingClassifier")
bag_acc = []
for i in x:
    bag = BaggingClassifier(base_estimator=DT,n_estimators=i,random_state=1)
    bag.fit(x_train,y_train)
    bag_pred = bag.predict(x_test)
    bag_acc.append(accuracy_score(y_test,bag_pred))
plt.plot(x,bag_acc)
plt.savefig('bagging_est.jpg')
plt.show()

# 利用随机森林算法重复上述实验
RF = RandomForestClassifier(n_estimators=50,random_state=1)
RF.fit(x_train,y_train)
RF_Pred = RF.predict(x_test)
print("RF_50算法的精度为:{}".format(accuracy_score(y_test,RF_Pred)))

# 步长控制下观察对决策器数目对结果的影响
plt.figure(1)
plt.style.use("ggplot")
plt.title("The effect of num_estimators",pad=20)
plt.xlabel("Num of estimators")
plt.ylabel("Test accuracy of RandomForestClassifier")
RF_acc = []
for i in x:
    RF = RandomForestClassifier(n_estimators=i,random_state=1)
    RF.fit(x_train,y_train)
    out = RF.predict(x_test)
    RF_acc.append(accuracy_score(y_test,out))
plt.plot(x,RF_acc)
plt.savefig("RandomForest_est.jpg")
plt.show()

机器学习练习bagging&RandomForest_第1张图片

你可能感兴趣的:(机器学习,机器学习,决策树,人工智能)