今天写了个SVM算法(支持向量机)的代码,总算写好。
代码参照了《机器学习实战》,但个人觉得它的逻辑不是太好,所以结合李航的书改了下。
数据就用《机器学习实战》中支持向量机那张的数据,代码上传上来和大家共享下:
# coding=utf8
import sys
import numpy as np
from numpy import *
import os
import matplotlib.pylab as plt
reload(sys)
sys.setdefaultencoding('utf-8')
os.chdir('D:\Study\ML\MLAction\Ch06')
"""存储SMO优化过程中的常用数据"""
class SmoOpt:
def __init__(self,X,y,C,tol):
#输入参数
self.X=X
self.y=y
self.C=C
self.tol=tol
#模型变量
self.alpha=None
self.b=None
#缓存数据
self.K=None
self.E=None
def cal_predict(x_index,opt):
""" 计算预测值(不是最终决策函数的值) :param x_index: :param opt: :return: """ y_pred=np.sum(opt.alpha*opt.y*opt.K[:,x_index])+opt.b
return y_pred
def calcEk(opt, k):
fXk=np.sum(opt.alpha * opt.y * opt.K[:, k]) + opt.b
Ek = fXk - opt.y[k]
return Ek
def updateEk(opt,k):
opt.E[k]=calcEk(opt, k)
def updateE(opt):
for i in range(opt.X.shape[0]):
opt.E[i]=calcEk(opt,i)
def check_KTT(alpha_index,opt):
""" 判断变量alpha是否满足KTT条件 :param alpha_index: 判断的变量位置 :param opt: :return: bool """ is_KTT=False
alpha=opt.alpha[alpha_index]
C=opt.C
tol=opt.tol
y_true=opt.y[alpha_index]
y_pred=cal_predict(alpha_index,opt)
if alpha==0 and y_true*y_pred-1>=tol:
is_KTT=True
elif alpha==C and y_true*y_pred-1<=-tol:
is_KTT = True
elif 0and -tol<=y_true*y_pred-1<=tol:
is_KTT=True
# is_KTT1 = True
# if ((opt.y[alpha_index]*opt.E[alpha_index] < -opt.tol) and (opt.alpha[alpha_index] < opt.C)) \
# or ((opt.y[alpha_index]*opt.E[alpha_index] > opt.tol) and (opt.alpha[alpha_index] > 0)):
# is_KTT1=False
return is_KTT
#region 核函数
def rbf(X,Xi,kernel_opt):
X_sum = np.sum(np.power(X - Xi,2), axis=1)
K = np.exp(-0.5 * X_sum / kernel_opt ** 2)
return K
def ploy(X,Xi,kernel_opt):
K = (np.dot(X, Xi) + 1) ** kernel_opt
return K
def linear(X,Xi,kernel_opt):
K = np.dot(X, Xi)
return K
#endregion
def smo(X,y,C=1,tol=1e-6,kernel=rbf,kernel_opt=1):
""" smo算法找拉格朗日乘子alpha的值和分界平面常数项b的值 :param X: m*n array :param y: 1*m list :param C: :param tol: :param kernel:核函数 :param kernel_opt:核函数参数 :return:alpha,b """ #输入检查
if X is None or y is None:
return None,None
#初始化
m, n = X.shape
opt=SmoOpt(X,y,C,tol)
opt.alpha=np.zeros(m)
opt.b=0.0
m=X.shape[0]
# 核函数
K = np.zeros((m, m))
for i in range(m):
K[i] = kernel(opt.X,X[i],kernel_opt)
opt.K=K
# 误差
E=np.zeros(m)
for i in range(m):
E[i]=calcEk(opt,i)
opt.E=E
#迭代更新alpha
while True:
is_changed=False#判断本次是否找到了更改的变量
#region 找出两个更新的变量alphai,alphaj
#选择第一个变量alphai
# 找出满足KKT条件的点:先遍历间隔边上的点(0 #开始遍历间隔边上的点和其它点
alpha_0C=(opt.alpha>0)*(opt.alpha< opt.C)
alpha_index=np.argsort(~alpha_0C)
for besti in alpha_index:
bestj = -1
if check_KTT(besti,opt):
continue
#选择第二个变量alphaj:两个变量误差相差最大的点(|Ei-Ej|最大),且有足够的下降
#先尝试快速找一个:Ei>0,找最小Ej,Ei<0,找最大Ej
#如果没有遍历间隔边界上的点,如果没有遍历所有点,如果还是没有,更换第一个变量alphai
#开始快速查找alphaj
# if opt.E[besti]>0:
# j=np.argmin(opt.E)
# else:
# j=np.argmax(opt.E)
# if besti!=j:
# bestj=j
# #遍历间隔边界上的点和其它点
# if bestj==-1:
# for j in alpha_index:
# if besti!=j:
# bestj=j
# break
# #还没找到,就更换alphai
# if bestj==-1:
# continue
maxE_diff=0
for j in alpha_index:
if besti==j:
continue
E_diff=np.abs(opt.E[i]-opt.E[j])
if E_diff>maxE_diff:
bestj=j
maxE_diff=E_diff
# region 更新变量alphai、alphaj
eta = opt.K[besti, besti] + opt.K[bestj, bestj] - 2 * opt.K[besti, bestj]
alphaj = opt.alpha[bestj] + opt.y[bestj] * (opt.E[besti] - opt.E[bestj]) / eta
if opt.y[besti] != opt.y[bestj]:
L = np.max([0, opt.alpha[bestj] - opt.alpha[besti]])
H = np.min([opt.C, opt.C + opt.alpha[bestj] - opt.alpha[besti]])
else:
L = np.max([0, opt.alpha[bestj] + opt.alpha[besti] - opt.C])
H = np.min([opt.C, opt.alpha[bestj] + opt.alpha[besti]])
if alphaj > H:
alphaj = H
elif alphaj < L:
alphaj = L
if np.abs(alphaj - opt.alpha[bestj]) < tol: # alphaj没有移动足够的值,重新找
continue
alphai = opt.alpha[besti] + opt.y[besti] * opt.y[bestj] * (opt.alpha[bestj] - alphaj)
# endregion
# region 更新变量b
b1_new = -opt.E[besti] - opt.y[besti] * opt.K[besti, besti] * (alphai - opt.alpha[besti]) \
- opt.y[bestj] * opt.K[bestj, besti] * (alphaj - opt.alpha[bestj]) + opt.b
b2_new = -opt.E[bestj] - opt.y[besti] * opt.K[besti, bestj] * (alphai - opt.alpha[besti]) \
- opt.y[bestj] * opt.K[bestj, bestj] * (alphaj - opt.alpha[bestj]) + opt.b
if 0 < alphai < opt.C:
b = b1_new
elif 0 < alphaj < opt.C:
b = b2_new
else:
b = (b1_new + b2_new) / 2.0
opt.b = b
opt.alpha[besti] = alphai
opt.alpha[bestj] = alphaj
# endregion
# region 更新误差Ei、Ej
updateE(opt)
# opt.E[besti] = calcEk(opt,besti)
# opt.E[bestj] = calcEk(opt,bestj)
# endregion
is_changed=True
# 没有找到更改的变量,终止程序
if not is_changed:
break
return opt.alpha,opt.b
def loadDataSet(fileName):
dataMat = []; labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr = line.strip().split('\t')
dataMat.append([float(lineArr[0]), float(lineArr[1])])
labelMat.append(float(lineArr[2]))
return np.array(dataMat),np.array(labelMat)
def calcWs(alphas,dataArr,classLabels):
X = mat(dataArr); labelMat = mat(classLabels).transpose()
m,n = shape(X)
w = zeros((n,1))
for i in range(m):
w += multiply(alphas[i]*labelMat[i],X[i,:].T)
return w
def svm_test():
k1=0.5
kernel=linear
dataArr,labelArr = loadDataSet('testSet.txt')
# dataArr,labelArr = loadDataSet('testSetRBF.txt')
alphas,b = smo(dataArr, labelArr, 200, 0.0001,kernel, k1) #C=200 important
svInd=nonzero(alphas>0)
sVs=dataArr[svInd] #get matrix of only support vectors
labelSV = labelArr[svInd]
print 'there are %d Support Vectors' % shape(sVs)[0]
m,n = shape(dataArr)
errorCount = 0
for k in range(m):
kernelEval=kernel(sVs, dataArr[k], k1)
predict=np.sum(kernelEval* labelSV*alphas[svInd]) + b
if sign(predict)!=sign(labelArr[k]): errorCount += 1
print "the training error rate is: %f" % (float(errorCount)/m)
w=calcWs(alphas,dataArr,labelArr).flatten()
print(w)
plt.scatter(dataArr[:,0],dataArr[:,1],c=labelArr)
x=arange(2,8,0.5)
y=(-w[0]*x- b)/w[1]
plt.plot(x,y)
# dataArr,labelArr = loadDataSet('testSetRBF2.txt')
# errorCount = 0
# m,n = shape(dataArr)
# for i in range(m):
# kernelEval = kernel(sVs,dataArr[i],k1)
# predict=np.sum(kernelEval* labelSV*alphas[svInd]) + b
# if sign(predict)!=sign(labelArr[i]): errorCount += 1
# print "the test error rate is: %f" % (float(errorCount)/m)
svm_test()