主体是书上的程序,添加了一个画图函数,查看分类结果。实际上,按照书上的程序,分类结果并不一定好。书上图6-4只是理想结果。程序后附上结果。
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 15 10:26:20 2018
@author: ###
"""
import numpy as np
import matplotlib.pyplot as plt
def loadDataSet(filename):
dataMat=[]
labelMat=[]
fr =open(filename)
for line in fr.readlines():
lineArr=line.strip().split('\t')
dataMat.append([float(lineArr[0]),float(lineArr[1])])
labelMat.append(float(lineArr[2]))
return dataMat,labelMat
def selectJrand(i,m):
j=i;
while(j==i):
j=int(np.random.uniform(0,m))
return j
def clipAlpha(aj,H,L):
'''
限制函数
'''
if aj>H:
aj=H
if L>aj:
aj=L
return aj
def smoSimple(dataMatIn,classLabels,C,toler,maxIter):
'''
简化版SMO函数
输入:数据集、标签集、惩罚常数C、容错率、最大迭代次数
numpy.multiply(),数组点乘
'''
#将列表转为矩阵,labelMat进行转置,使数据集每行与标签集每行相对应
dataMatrix=np.mat(dataMatIn)
labelMat=np.mat(classLabels).transpose()
b=0
m,n=np.shape(dataMatrix)
alphas=np.mat(np.zeros((m,1)))
iter=0 #存储没有任何alpha改变情况下遍历数据集的次数
while(iter w 利用拉格朗日乘子法得到
fXi=float(np.multiply(alphas,labelMat).T*\
(dataMatrix*dataMatrix[i,:].T))+b
Ei=fXi-float(labelMat[i])
if((labelMat[i]*Ei<-toler)and(alphas[i]toler) and (alphas[i]>0)): #不满足KTT条件
j=selectJrand(i,m) #随机选择第二个alpha
fXj=float(np.multiply(alphas,labelMat).T* \
(dataMatrix*dataMatrix[j,:].T))+b
Ej=fXj-float(labelMat[j])
alphaIold=alphas[i].copy() #保留原先的alpha值
alphaJold=alphas[j].copy()
'''
计算第二个变量的取值范围
根据 a1*y1+a2*y2=k 约束条件:
1)y1!=y2 : a1-a2=k' -> a2=a1-k'
2)y1==y2 : a1+a2=k' -> a2=-a1+k'
'''
if(labelMat[i]!=labelMat[j]):
L=max(0,alphas[j]-alphas[i])
H=min(C,C+alphas[j]-alphas[i])
else:
L=max(0,alphas[j]+alphas[i]-C)
H=min(C,alphas[j]+alphas[i])
if L==H:
print("L==H")
continue;
#计算新的第二个变量值 #计算eta
eta=2.0*dataMatrix[i,:]*dataMatrix[j,:].T- \
dataMatrix[i,:]*dataMatrix[i,:].T- \
dataMatrix[j,:]*dataMatrix[j,:].T
if(eta>=0):
print("eta>=0")
continue
alphas[j]-=labelMat[j]*(Ei-Ej)/eta
alphas[j]=clipAlpha(alphas[j],H,L)
if (abs(alphas[j]-alphaJold)<0.00001):
print("j not moving enough")
continue;
alphas[i]+=labelMat[j]*labelMat[i]*(alphaJold-alphas[j])#更新第一个变量
b1=b-Ei-labelMat[i]*(alphas[i]-alphaIold)*dataMatrix[i,:]*dataMatrix[i,:].T-\
labelMat[j]*(alphas[j]-alphaJold)*dataMatrix[i,:]*dataMatrix[j,:].T
b2=b-Ej-labelMat[i]*(alphas[i]-alphaIold)*dataMatrix[i,:]*dataMatrix[j,:].T-\
labelMat[j]*(alphas[j]-alphaJold)*dataMatrix[j,:]*dataMatrix[j,:].T
if(0alphas[i]):
b=b1
elif (0alphas[j]):
b=b2
else:
b=(b1+b2)/2.0
alphaPairsChanged+=1
print("iter:%d i:%d,pairs changed %d" % (iter,i,alphaPairsChanged))
if(alphaPairsChanged==0):
iter+=1
else:
iter=0
print("iteration number :%d "%iter)
return b,alphas
def plotF(dataIn,classLabel,b,alphas):
'''
画出数据集及决策边界
'''
xcord1=[]
ycord1=[]
xcord2=[]
ycord2=[]
dataArr=np.array(dataIn)
m=np.shape(dataArr)[0]
markx=[]
marky=[]
for i in range(m):
if(classLabel[i]==1):
xcord1.append(dataArr[i][0])
ycord1.append(dataArr[i][1])
else:
xcord2.append(dataArr[i][0])
ycord2.append(dataArr[i][1])
if(alphas[i]>0):
markx.append(dataArr[i][0])
marky.append(dataArr[i][1])
fig=plt.figure()
ax=fig.add_subplot(111)
ax.scatter(xcord1,ycord1,s=30,c='red',marker='s',label=1)
ax.scatter(xcord2,ycord2,s=30,c='green',marker='*',label=-1)
ax.scatter(markx,marky,s=80,c='',edgecolors='blue')
#画决策边界
x=np.arange(1,6,0.1)
datamatrix=np.mat(dataIn)
labelMat=np.mat(classLabel)
beta=np.multiply(alphas,labelMat.T).T*datamatrix
y=(-b[0,0]-beta[0,0]*x)/beta[0,1]
ax.legend(loc='upper left')
ax.plot(x,y)
plt.show()
if __name__=='__main__':
dataArr,labelArr=loadDataSet("testSet.txt")
b,alphas=smoSimple(dataArr,labelArr,0.6,0.001,40)
print(alphas[alphas>0])
for i in range(100):
if(alphas[i]>0):
print(alphas[i], dataArr[i],labelArr[i])
plotF(dataArr,labelArr,b,alphas)
运行结果: