#-*- coding:UTF-8 -*-
import numpy as np
class ABSboost():
def __init__(self,Cdt,X,Y):
self.Cdt = Cdt
self.X = X
self.Y = Y
self.N = len(Y)
self.D = []
self.Gx = []
self.createGxFun()#创建所有可能的分类器
def createGxFun(self):
self.AllGX = {}
R = set(self.Y)
for i,c in enumerate(self.Cdt):
cvalue = set(self.X[::,i:i+1:1].T[0])#找到该条件的可能值
#lambda 匿名函数 v为条件值 r是结果
self.AllGX[c] = [[lambda x,_v=v,_r=r:1*_r if x == _v else -1*_r , v,r] for v in cvalue for r in R]
#简单使用分类误差率递归,可以使用基尼,稍作修改就可,基尼性能会更好
def TreeS(self,max_iter):
#计算em
def computem(X,Gx,Dm):
em = 0.0
for i in range(self.N):
em += Dm[i]* (1 if Gx(X[i]) != self.Y[i] else 0)
return em
#计算am
def computAm(em):
return np.log((1-em)/em)/2
#找到最小em的G(x)
def findminem(X,Dm):
emmin = float("inf")
Gxmin = None
Gxkmin = None
for i,Gxval in enumerate(self.AllGX.keys()):
xin = X[::,i:i+1:1].T[0]
for Gx in self.AllGX[Gxval]:
em = computem(xin, Gx[0], Dm)
#记录最小em
if em < emmin:
emmin = em
Gxmin = Gx
Gxkmin = Gxval
return [emmin,computAm(emmin),Gxmin,Gxkmin]
#计算wmi的分子
def computnum(wmi,am,Gxm,yi,xi):
return wmi*np.exp(-am*yi*Gxm(xi))
#计算wmi的分母 归一化
def computZm(Dm,am,Gxm,Y,X):
zw = 0.0
for i in range(self.N):
zw += computnum(Dm[i], am, Gxm, Y[i], X[i])
return zw
#计算Dm+1
def computDm1(Dm,am,Gxm,Gxkm):
indx = self.Cdt.index(Gxkm)
#以条件进行切分
X = self.X[::,indx:indx+1:1].T[0]
Y = self.Y
zw = computZm(Dm, am, Gxm, Y, X)
return [computnum(Dm[i], am, Gxm, Y[i], X[i])/zw for i in range(self.N)]
def createfx_iter(em,Dm,Gxm,Gxkm,m_iter):
if m_iter > max_iter or em == 0:
print("训练结束")
return
gxv = findminem(self.X, Dm)#[em,am,Gxm,条件]
self.D.append(Dm)
Dm1 = computDm1(Dm, gxv[1], gxv[2][0], gxv[3])
self.Gx.append(gxv)
createfx_iter(gxv[0], Dm1, gxv[2][0], gxv[3],m_iter+1)
Dm = [1.0/self.N]*self.N#初始化D1 [0.1]*N
createfx_iter(1, Dm, None,None,0)
def predect(self,X):
#f(x) = sum(a1*G1(x),...,am*Gm(x))
def fx(x):
fx = 0.0
for gx in self.Gx:
indx = self.Cdt.index(gx[3]);
fx += gx[1]*gx[2][0](x[indx])
return np.sign(fx)
return [fx(xi) for xi in X]
def main():
#8.1习题
Condition = ['身体','业务能力','发展潜力']
X = np.array([[0,1,3],[0,3,1],[1,2,2],[1,1,3],[1,2,3],
[0,1,2],[1,1,2],[1,1,1],[1,3,1],[0,2,1]])
Y = np.array([-1,-1,-1,-1,-1,-1,1,1,-1,-1])
absb = ABSboost(Condition, X, Y)
#测试到6次能预测正确
absb.TreeS(6)
print(absb.D)
print(absb.Gx)
print(absb.predect(X))
pass
if __name__ == '__main__':
main()
支持向量机:
策略:极小化正则化合页损失,软间隔最大化 算法:序列最小最优化(SMO)
逻辑斯蒂回归模型:
策略:极大对数似然函数,正则化的极大似然估计 算法:改进的迭代尺度算法,梯度下降,拟牛顿
AdaBoost:
策略:极小化加法模型的指数损失 算法:前向分步加法算法