看了几遍西瓜书,感觉还是似懂非懂,自己手编一下感觉清晰多了,有兴趣的朋友可以看一眼,然后自己独立编一下
import matplotlib
from matplotlib import cm
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_moons
from sklearn.tree import DecisionTreeClassifier
class Adaboost:
def __init__( self, X, y ,lr=1.0):
self.X = X
self.y = y
self.lr = lr # 学习率
self.classifiers = [] # 子分类器集合
self.alphas = [] # 子分类器权值
self.num_samples = len(self.X) # 样本个数
self.weights = np.array( [1/self.num_samples] * self.num_samples ) # 数据权重
def addClassifier(self, classifier=DecisionTreeClassifier(max_depth=1), plotFlag=False):
classifier.fit( self.X, self.y ,sample_weight=self.weights) # 训练子分类器
y_predict = classifier.predict(self.X) # 子分类器预测
# print(y_predict.shape) # (500,)
error_rate = np.sum( (y_predict != self.y) * self.weights ) / np.sum(self.weights) # 计算加权错误率
alpha = 0.5 * self.lr * np.log( (1 - error_rate) / error_rate ) # 计算alpha
self.weights *= np.exp( -alpha * y_predict * self.y)
self.weights /= np.sum(self.weights) # 更新数据权重
self.classifiers.append(classifier) # 收集子分类器
self.alphas.append(alpha) # 收集alpha
if plotFlag: # 看切痕
plt.scatter(self.X[y_predict==-1][:, 0], self.X[y_predict==-1][:, 1], marker='o')
plt.scatter(self.X[y_predict==1][:, 0], self.X[y_predict==1][:, 1], marker='s') # 训练点的散点图
plt.title('Cut Mark')
plt.show()
def predict(self, X ,original=False):
y_predict = np.zeros([len(X)]).astype("float")
for classifier, alpha in zip(self.classifiers, self.alphas):
y_predict += alpha * classifier.predict(X)
if original: # 是否原始输出
return y_predict
else:
y_predict = np.sign(y_predict)
return y_predict
def plot(self, style='2d'):
if len(self.X.shape) != 2:
return
y_predict = self.predict(self.X) # 子分类器预测
error_rate = np.sum(y_predict != self.y)/self.num_samples # 算精度
fig = plt.figure(figsize=(5, 4), dpi=140)
xmin, xmax = np.min(self.X[:,0], axis=0), np.max(self.X[:,0], axis=0) # 算xy轴界限
ymin, ymax = np.min(self.X[:,1], axis=0), np.max(self.X[:,1], axis=0)
if style == "2d":
test_X,test_Y = np.mgrid[xmin:xmax:200j, ymin:ymax:200j ] #生成网络采样点
grid_test = np.stack((test_X.flat,test_Y.flat) ,axis=1) #测试点
grid_hat = self.predict(grid_test) # 预测分类值
grid_hat = grid_hat.reshape(test_X.shape) # 使之与输入的形状相同
ax = fig.add_subplot(1, 1, 1)
ax.set( title='Adaboost(iter_num:{},error_rate:{})'.format( len(self.alphas), error_rate ))
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
cm_light=matplotlib.colors.ListedColormap(['#A0FFA0', '#FFA0A0', '#A0A0FF']) # 配置颜射
ax.pcolormesh(test_X, test_Y, grid_hat, cmap=cm_light) # 预测值的显示
ax.scatter(self.X[self.y==-1][:, 0], self.X[self.y==-1][:, 1], marker='o')
ax.scatter(self.X[self.y==1][:, 0], self.X[self.y==1][:, 1], marker='s') # 训练点的散点图
elif style == "3d":
test_X,test_Y = np.mgrid[xmin:xmax:50j, ymin:ymax:50j ] #生成网络采样点
grid_test = np.stack((test_X.flat,test_Y.flat) ,axis=1) #测试点
grid_hat = self.predict(grid_test,original=True) # 预测分类值
zmax, zmin = np.max(grid_hat)+5, np.min(grid_hat)-5 # 计算Z轴范围
grid_hat = grid_hat.reshape(test_X.shape) # 使之与输入的形状相同
ax = fig.add_subplot(1, 1, 1, projection='3d')
ax.set( title='Adaboost(iter_num:{},error_rate:{})'.format( len(self.alphas), error_rate ))
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
ax.set_zlim(zmin, zmax)
ax.view_init(elev=17,azim=129) # 摆好视角
cmap = cm.jet # 定义风格
surf = ax.plot_surface(test_X, test_Y, grid_hat, rstride=1, cstride=1, # 下采样为1
cmap=cmap,linewidth=1,antialiased=False)
fig.colorbar(surf, shrink=0.5, aspect=10)
# ax.contourf(test_X, test_Y, grid_hat, zdir='z', offset=zmin,cmap=cmap,linewidth=1) # 投影图
ax.contour(test_X, test_Y, grid_hat, [0], zdir='z', offset=zmin, colors='k', linewidth=20) # 0等高线
ax.scatter(self.X[self.y==-1][:, 0], self.X[self.y==-1][:, 1],
np.full(len(self.X[self.y==-1][:, 1]),zmin), marker='o')
ax.scatter(self.X[self.y==1][:, 0], self.X[self.y==1][:, 1],
np.full(len(self.X[self.y==-1][:, 1]),zmin), marker='s') # 训练点的散点图
plt.show()
if __name__ == '__main__':
X, y = make_moons(n_samples=500, noise=0.3, random_state=3) # 构造数据
# print(X.shape, y.shape) # (500, 2) (500,)
y[np.where(y == 0)] = -1 # 把0替换为-1
model = Adaboost(X, y, lr=0.6)
for i in range(50):
model.addClassifier(classifier=DecisionTreeClassifier(max_depth=1), # 不能省略,不然会出错
plotFlag = False)
# y_predict = model.predict(X)
# plt.scatter(X[y_predict==-1][:, 0], X[y_predict==-1][:, 1], marker='o')
# plt.scatter(X[y_predict==1][:, 0], X[y_predict==1][:, 1], marker='s') # 训练点的散点图
# plt.show()
# model.plot(style='2d')
model.plot(style='3d')
集成学习-从零推导和实现adaboost与3D可视化