Bagging...

# _*_coding:utf-8_*_
"""
  一个原始数据的bagging分类,编辑代码思想的步骤:
           1. 根据要实现的需求,导入数据处理和功能调用的包/模块
           2. 创建数据
           3. 创建变量n_tree:集成分类器棵数
           4. 创建存储分类器的存储器
           5. 循环1-n_tree的训练和预测:
                                   训练 01:训练循环体中选用抽取方式并调用
                                   训练 02:将x,y从数据表格中取出
                                   训练 03:实例化分类器
                                   训练 04:训练
                                   训练 05:每循环一次分类器存储到存储器
                                   预测 01:重新创建X,Y变量取出数据
                                   预测 02:初始化分类器计算的总值total
                                   预测 03:预测循环体中存储器每一次的predict()
                                   预测 04:total叠加
                                   预测 05:预测y
           6. 打分
"""
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score, accuracy_score

df = pd.DataFrame([[0,1],
                   [1,1],
                   [2,1],
                   [3,-1],
                   [4,-1],
                   [5,-1],
                   [6,1],
                   [7,1],
                   [8,1],
                   [9,-1]])
#print(df)
N_TREE = 100
# 用来存储弱学习器
models = []
# df2 = df.sample(frac=1.0, replace=False)

# 集成100棵决策树的弱学习器
for i in range(N_TREE):
    """
       sample(), 抽样
       n=None, 抽样的数据量
       frac=None,  抽样占比
       replace=False/True,  无放回/有放回 抽样
       weights=None, 权重
       random_state=None,  随机数种子
       axis=None,  维度
    """
    df2 = df.sample(frac=1.0, replace=True)
    X = df2.iloc[:, :1]
    Y = df2.iloc[:, -1]
    model = DecisionTreeClassifier(max_depth=1)
    model.fit(X, Y)  # 训练

    models.append(model)
# print(models)

# 预测100棵可集成分类器结果
print('#' * 100)
x = df.iloc[:, :-1]
y = df.iloc[:, -1]

# print(df.shape[0])    # df.shape: (10, 2); df.shape[0]: 10
total = np.zeros(df.shape[0])
print("尚未循环训练的total", total)

for i in range(N_TREE):
    # print(models[i].predict(x))
    total += np.array(models[i].predict(x))

print(total)

y_hat = np.sign(total)
print(y_hat)
print(accuracy_score(y, y_hat))
print(f1_score(y, y_hat))

print('-' * 100)
print('整理一次有放回抽样的训练和预测')
model01 = DecisionTreeClassifier(max_depth=1)
model01.fit(x, y)
y_hat01 = model01.predict(x)
print(y_hat01)
print(accuracy_score(y, y_hat01))
print(f1_score(y, y_hat01))

####################################################################################################
尚未循环训练的total [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[ 58.  58.  52.  -8. -38. -36.  -4.   4.   4. -40.]
[ 1.  1.  1. -1. -1. -1. -1.  1.  1. -1.]
0.9
0.9090909090909091
----------------------------------------------------------------------------------------------------
整理一次有放回抽样的训练和预测
[ 1  1  1 -1 -1 -1 -1 -1 -1 -1]
0.7
0.6666666666666666

Process finished with exit code 0

你可能感兴趣的:(决策树,python)