import matplotlib.pyplot as plt
import numpy as np
import random
'''
数据读取
'''
def loadDataSet(fileName):
dataMat = []; labelMat = []
fr = open(fileName,'rb')
for line in fr.readlines():
lineArr = line.decode('utf-8-sig').strip().split('\t')
dataMat.append([float(lineArr[0]),float(lineArr[1])])
labelMat.append(float(lineArr[2]))
return dataMat,labelMat
'''
随机选择alpha
i:alpha下标
m:alpha参数全部个数
'''
def selectJrand(i,m):
j = i
while(j==i):
j = int(random.uniform(0,m))
return j
'''
根据取值范围修剪aj
H:取值上限
L:取值下限
'''
def clipAlaph(aj,H,L):
if aj > H:
aj = H
if L > aj:
aj = L
return aj
'''
简化版SMO
C:松弛变量 toler:容错率 maxIter:最大迭代次数
1、计算误差
2.上下限计算
3、计算学习速率
4、更新alpha_j
5、修剪alpha_j
6、更新alpha_i
7、更新b1、b2
'''
def smoSimple(dataMatIn,classLabels,C,toler,maxIter):
dataMat = np.mat(dataMatIn);labelMat = np.mat(classLabels).transpose()
b=0
m,n = np.shape(dataMat)
alphas = np.mat(np.zeros((m,1)))
iter_num = 0
while(iter_num < maxIter):
alphaPairsChanged = 0
for i in range(m):
fxi = float(np.multiply(alphas,labelMat).T*(dataMat*dataMat[i,:].T)) + b
Ei = fxi - float(labelMat[i])
if ((labelMat[i]*Ei < -toler) and (alphas[i] < C)) or ((labelMat[i]*Ei > toler) and (alphas[i] > C)):
j = selectJrand(i,m)
fxj = float(np.multiply(alphas, labelMat).T * (dataMat * dataMat[j, :].T)) + b
Ej = fxj - float(labelMat[j])
alphaIold = alphas[i].copy()
alphaJold = alphas[j].copy()
if(labelMat[i] != labelMat[j]):
L = max(0,alphas[j]-alphas[i])
H = min(C,C+alphas[j]-alphas[i])
else:
L = max(0, alphas[j] + alphas[i] - C)
H = min(C, C + alphas[j] + alphas[i])
if L==H:print('L==H');continue
eta = 2.0 * dataMat[i,:]*dataMat[j,:].T - dataMat[i,:]*dataMat[i,:].T - dataMat[j,:]*dataMat[j,:].T
if eta >= 0:
print('eta>=0');continue
alphas[j] -= labelMat[j]*(Ei - Ej)/eta
alphas[j] = clipAlaph(alphas[j],H,L)
if (abs(alphas[j]-alphaJold) < 0.00001):
print('alpha[j]变化太小')
continue
alphas[i] += labelMat[j]*labelMat[i]*(alphaJold-alphas[j])
b1 = b - Ei - labelMat[i]*(alphas[i]-alphaIold)*dataMat[i,:]*dataMat[i,:].T - labelMat[j]*(alphas[j]-alphaJold)*dataMat[i,:]*dataMat[j,:].T
b2 = b - Ej - labelMat[i]*(alphas[i]-alphaIold)*dataMat[i,:]*dataMat[j,:].T - labelMat[j]*(alphas[j]-alphaJold)*dataMat[j,:]*dataMat[j,:].T
if (0and (C >alphas[i]):
b=b1
elif(0and (C >alphas[j]):
b=b2
else:
b=(b1+b2)/2
alphaPairsChanged += 1
print('第%d次迭代样本:%d,alpha优化次数:%d' %(iter_num,i,alphaPairsChanged))
if(alphaPairsChanged == 0):iter_num+=1
else:iter_num = 0
print('迭代次数:%d' %iter_num)
return b,alphas
'''
函数说明:计算w
'''
def get_w(dataMat,labelMat,alphas):
alphas,dataMat,labelMat = np.array(alphas),np.array(dataMat),np.array(labelMat)
w = np.dot((np.tile(labelMat.reshape(1,-1).T,(1,2))*dataMat).T,alphas)
return w.tolist()
"""
函数说明:数据可视化
Parameters:
dataMat - 数据矩阵
labelMat - 数据标签
"""
def showDataSet(dataMat, labelMat,alphas,w,b):
data_plus = []
data_minus = []
for i in range(len(dataMat)):
if labelMat[i] > 0:
data_plus.append(dataMat[i])
else:
data_minus.append(dataMat[i])
data_plus_np = np.array(data_plus)
data_minus_np = np.array(data_minus)
plt.scatter(np.transpose(data_plus_np)[0], np.transpose(data_plus_np)[1])
plt.scatter(np.transpose(data_minus_np)[0], np.transpose(data_minus_np)[1])
x1 = max(dataMat)[0]
x2 = min(dataMat)[0]
a1,a2 = w
b = float(b)
a1 = float(a1[0])
a2 = float(a2[0])
y1, y2 = (b + a1 * x1), (b + a1 * x2)
plt.plot([x1,x2],[y1,y2])
for i,alpha in enumerate(alphas):
if alpha > 0:
x,y = dataMat[i]
plt.scatter([x], [y], s=150, c='none', alpha=0.7, linewidth=1.5, edgecolor='red')
plt.show()
if __name__ == '__main__':
dataMat,labelMat = loadDataSet('G:\dataSet.txt')
b,alphas = smoSimple(dataMat,labelMat,0.6,0.001,80)
w = get_w(dataMat,labelMat,alphas)
showDataSet(dataMat,labelMat,alphas,w,b)
运行结果: