集成学习入门 - 1 混合训练数据

  1. 决策树
# 决策树
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

X,y = load_iris(return_X_y=True)
train_X,test_X,train_Y,test_Y = train_test_split(X,y,test_size=0.2,random_state=123)
tree = DecisionTreeClassifier()
tree.fit(train_X,train_Y)
print(tree.score(test_X,test_Y))
# 0.9666666666666667
  1. 随机森林
# 随机森林
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

X,y = load_iris(return_X_y=True)
train_X,test_X,train_Y,test_Y = train_test_split(X,y,test_size=0.1,random_state=123)
forest = RandomForestClassifier(n_estimators=8)
forest=forest.fit(train_X,train_Y)
# 1.0
rf_output = forest.predict(test_X)
print(rf_output)
# [1 2 2 1 0 2 1 0 0 1 2 0 1 2 2]
  1. 不放回采样
# 不替换采样(采样标记后不放回)
from sklearn.utils import resample
import numpy as np

np.random.seed(123)
data = [1,2,3,4,5,6,7,8,9]
num_divisions = 2    # 分成2个筒
list_of_data_divisions = []
for x in range(0, num_divisions):
    sample = resample(data,replace=False,n_samples=5)
    list_of_data_divisions.append(sample)
print('Sample',list_of_data_divisions)
# Sample [[8, 1, 6, 7, 4], [4, 6, 5, 3, 8]]
  1. 放回采样
# 替换采样(采样标记后,再放回,继续采样)
from sklearn.utils import resample
import numpy as np

np.random.seed(123)
data = [1,2,3,4,5,6,7,8,9]
num_divisions = 3    # 分成3个筒
list_of_data_divisions = []
for x in range(0, num_divisions):
    sample = resample(data,replace=False,n_samples=4)  # 每个桶4个数据
    list_of_data_divisions.append(sample)
print('Sample',list_of_data_divisions)
# Sample [[8, 1, 6, 7], [4, 6, 5, 3], [3, 2, 9, 8]]

你可能感兴趣的:(集成学习入门 - 1 混合训练数据)