import numpy as np import pandas as pd from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import f1_score, accuracy_score class Adaboost: """ __init__(self, num_classifiers): 构造方法,用于初始化Adaboost对象。num_classifiers指定要使用的分类器数量。 weighted_error(y_true, y_pred, weights): 静态方法,计算加权错误率。它接受真实标签y_true、预测标签y_pred和样本权重weights作为输入,并返回加权错误率。 update_weights(weights, alpha, y_true, y_pred): 静态方法,根据分类器权重更新样本权重。它接受当前样本权重weights、分类器权重alpha、真实标签y_true和预测标签y_pred作为输入,并返回更新后的样本权重。 fit(self, X, y): 训练Adaboost分类器。它接受训练数据X和标签y作为输入,并使用num_classifiers个决策树分类器进行训练 """ def __init__(self, num_classifiers): self.num_classifiers = num_classifiers self.classifiers = [] self.classifier_weights = [] @staticmethod def weighted_error(y_true, y_pred, weights): return np.sum(weights[y_true != y_pred]) @staticmethod def update_weights(weights, alpha, y_true, y_pred): weights_new = weights * np.exp(alpha * (y_true != y_pred)) return weights_new / np.sum(weights_new) def fit(self, X, y): # 初始化样本权重 sample_weights = np.ones(len(X)) / len(X) for _ in range(self.num_classifiers): # 创建分类器并训练 classifier = DecisionTreeClassifier(max_depth=1) classifier.fit(X, y, sample_weight=sample_weights) # 预测 y_pred = classifier.predict(X) # 计算错误率和分类器权重 error = self.weighted_error(y, y_pred, sample_weights) print(error) print('-'* 50) alpha = 0.5 * np.log((1 - error) / error) # 更新样本权重 sample_weights = self.update_weights(sample_weights, alpha, y, y_pred) # 保存分类器和权重 self.classifiers.append(classifier) self.classifier_weights.append(alpha) def predict(self, X): # 初始化预测结果 y_pred = np.zeros(len(X)) for classifier, alpha in zip(self.classifiers, self.classifier_weights): # 对每个分类器进行预测 y_pred_single = classifier.predict(X) # 根据权重进行加权 y_pred += alpha * y_pred_single # 使用符号函数将预测结果转换为类标签 y_pred = np.sign(y_pred) print("预测y_pred:", y_pred) return y_pred
# 准备数据 df = pd.DataFrame([[0, 1], [1, 1], [2, 1], [3, -1], [4, -1], [5, -1], [6, 1], [7, 1], [8, 1], [9, -1]]) # 提取数据信息 X = df.iloc[:, :-1] Y = df.iloc[:, -1] print(X) print(Y) # 实例化类对象 A = Adaboost(20) A.fit(X, Y) A.predict(X)
E:/XXXXX/机器学习课程/集成学习/Cls_adaboost.py
0
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
0 1
1 1
2 1
3 -1
4 -1
5 -1
6 1
7 1
8 1
9 -1
Name: 1, dtype: int64
0.30000000000000004
--------------------------------------------------
0.34534632929202286
--------------------------------------------------
0.35008562397019116
--------------------------------------------------
0.28001073918680375
--------------------------------------------------
0.25381778971648017
--------------------------------------------------
0.3064962299371764
--------------------------------------------------
0.2816048333195814
--------------------------------------------------
0.2731345864165943
--------------------------------------------------
0.29156133793693806
--------------------------------------------------
0.2823934402541752
--------------------------------------------------
0.33466386078918664
--------------------------------------------------
0.24608157437902595
--------------------------------------------------
0.3502647590291356
--------------------------------------------------
0.25394656115341074
--------------------------------------------------
0.2731513767570225
--------------------------------------------------
0.3056825853100197
--------------------------------------------------
0.2720919041411729
--------------------------------------------------
0.34005542241591524
--------------------------------------------------
0.24744912020573245
--------------------------------------------------
0.3529066226801387
--------------------------------------------------
预测y_pred: [ 1. 1. 1. -1. -1. -1. 1. 1. 1. -1.]
Process finished with exit code 0