class Adaboost_tree:
def __init__(self, X, Y):
self.X = np.array(X)
self.Y = np.array(Y)
self.N = len(X)
self.feature_num = len(X[0])
self.w = np.array([1 / self.N] * self.N)
self.g_x = []
self.get_feature_dict()
def compute_error(self, y):
y = np.array(y)
return np.sum(self.w[y != self.Y])
def compute_am(self, em):
return 1 / 2 * math.log((1 - em) / em)
def get_feature_dict(self):
self.f_dict = {}
for i in range(self.feature_num):
self.f_dict[i] = list(set(x[i] for x in self.X))
#给每的特征收集所有的可能值,比如特征0只有0 1;特征1 有1 2 3
def fit(self, max_iter = 20):
for iter in range(max_iter):
index_list = []
error_list1 = []
error_list2 = []
pred_y_list1 = []
pred_y_list2 = []
for i in range(self.feature_num):
for j in self.f_dict[i]:
y1 = [1 if m[i] <= j else -1 for m in self.X]
y2 = [-1 if m[i] <= j else 1 for m in self.X]
error1 = self.compute_error(y1)
error2 = self.compute_error(y2)
index_list.append((i, j))
error_list1.append(error1)
error_list2.append(error2)
pred_y_list1.append(y1)
pred_y_list2.append(y2)
#对每个类别,尝试选定一个结果为1/-1,计算对应的y、error
if min(error_list1) <= min(error_list2):
min_index = error_list1.index(min(error_list1))
split_f_index, split_value = index_list[min_index]
pred_y = pred_y_list1[min_index]
positive = 1
else:
min_index = error_list2.index(min(error_list2))
split_f_index, split_value = index_list[min_index]
pred_y = pred_y_list2[min_index]
positive = -1
em = self.compute_error(pred_y)
if em == 0:
print('em is zero')
break
am = self.compute_am(em)
# print(split_f_index, split_value, positive, am)
self.g_x.append((split_f_index, split_value, positive, am))
w_list = self.w * np.exp(-am * self.Y * np.array(pred_y))
self.w = w_list / np.sum(w_list) #归一化
# print(self.w)
def predict_single(self, x):
result = 0
for split_f_index, split_value, positive, am in self.g_x:
if x[split_f_index] <= split_value:
result += positive * am
else:
result -= positive * am
return np.sign(result)
def predict(self, X):
result = [self.predict_single(x) for x in X]
print("predict:", result)
return result
def main():
X = np.array([[0, 1, 3], [0, 3, 1], [1, 2, 2], [1, 1, 3], [1, 2, 3],
[0, 1, 2], [1, 1, 2], [1, 1, 1], [1, 3, 1], [0, 2, 1]])
Y = np.array([-1, -1, -1, -1, -1, -1, 1, 1, -1, -1])
Adaboost_tree_ = Adaboost_tree(X, Y)
Adaboost_tree_.fit(20)
print(Adaboost_tree_.predict(X))
if __name__ == '__main__':
main()
参考:统计学习方法第八章作业:分类问题的AdaBoost算法和回归问题的提升树算法,代码,实现
题目:比较支持向量机、 AdaBoost、逻辑斯谛回归模型的学习策略与算法。
\;\;\;\; 学习策略通常来讲只有结构风险最小化和经验风险最小化两类;算法查询书本即可。
学习策略 | 算法 | |
---|---|---|
支持向量机 | 结构风险最小化 | SMO序列 |
AdaBoost | 经验风险最小化 | 前向分布算法的二分类学习算法 |
逻辑斯蒂回归 | 经验风险最小化 | 改进的迭代尺度法,梯度下降法,牛顿法,拟牛顿法 |