import numpy as np
import random
data = np.loadtxt("heart-c.txt")
# 缓存样本点积
dp = np.dot(data[:,0:data.shape[1]-2], np.transpose(data[:,0:data.shape[1]-2]))
Lab = data[:,data.shape[1] - 1]
# 初始化α向量
alphas = [0] * data.shape[0]
# 初始化偏置项b和惩罚参数C
b = 0
C = 0.8
# 初始化每个样本的估计错误率
E = -Lab
# 设置精度
eps = 0.05
# --计算目标函数值-- #
def objValue(x,id1,id2):
global data,dp,Lab,alphas,b,C,E,eps
# 计算常量
con = 0
i = 0
j = 0
for i in range(data.shape[0]):
if i != id1 and i != id2:
con = con + alphas[i]
else:
continue
for j in range(data.shape[0]):
if j != id1 and j != id2:
con = con - 0.5 * Lab[i] * Lab[j] * dp[i][j] * alphas[i] * alphas[j]
else:
continue
# compute t1 and t2
t0 = list(map(np.dot, Lab, alphas))
t1 = np.dot(t0,dp[id1,:]) + b - Lab[id1]*alphas[id1]*dp[id1][id1] - Lab[id2]*x*dp[id2][id1]
t2 = np.dot(t0,dp[id2,:]) + b - Lab[id1]*alphas[id1]*dp[id1][id2] - Lab[id2]*x*dp[id2][id2]
v = alphas[id1] + x - 0.5*dp[id1][id1]*alphas[id1]**2 - 0.5*dp[id2][id2]*x**2 - \
Lab[id1]*Lab[id2]*dp[id1][id2]*alphas[id1]*x - Lab[id1]*alphas[id1]*t1 - \
Lab[id2]*x*t2 + con
return v
# --求解两变量的优化问题-- #
def varPairOpt(id1,id2):
global data,dp,Lab,alphas,b,C,E,eps
if id1 == id2:
return 0
alpha1 = alphas[id1]
alpha2 = alphas[id2]
alpha1_new = alpha1
alpha2_new = alpha2
y1 = Lab[id1]
y2 = Lab[id2]
e1 = E[id1]
e2 = E[id2]
s = y1 * y2
# 计算下限L和上限H
L = 0
H = 0
if y1 != y2:
L = max(0, alpha2 - alpha1)
H = min(C, C + alpha2 - alpha1)
else:
L = max(0, alpha2 + alpha1 - C)
H = min(C, alpha2 + alpha1)
if L == H:
return 0
eta = 2 * dp[id1][id2] - dp[id1][id1] - dp[id2][id2]
# 确定alpha2_new的值
if eta < 0:
alpha2_new = alpha2 - y2 * (e1 - e2) / eta
if alpha2_new < L:
alpha2_new = L
elif alpha2_new > H:
alpha2_new = H
else: # eta equals to zero
Lobj = objValue(L,id1,id2)
Hobj = objValue(H,id1,id2)
if Lobj > Hobj + eps:
alpha2_new = L
elif Lobj < Hobj + eps:
alpha2_new = H
else:
alpha2_new = alpha2
# alpha2没有足够大的变化则返回
if abs(alpha2-alpha2_new) < eps:
return 0
alpha1_new = alpha1 + s * (alpha2 - alpha2_new)
# 更新b
b1_new = E[id1] + Lab[id1] * (alpha1_new - alphas[id1]) * dp[id1][id2] + \
Lab[id2] * (alpha2_new - alphas[id2]) * dp[id1][id2] + b
b2_new = E[id2] + Lab[id1] * (alpha1_new - alphas[id1]) * dp[id1][id2] + \
Lab[id2] * (alpha2_new - alphas[id2]) * dp[id2][id2] + b
b_old = b
if alpha1_new > 0 and alpha1_new < C:
b = b1_new
elif alpha2_new > 0 and alpha2_new < C:
b = b2_new
else:
b = (b1_new + b2_new) / 2
# 更新E
E = np.add(np.add(E,b - b_old), np.add(Lab[id1]*(alpha1_new-alphas[id1])*dp[id1,:], \
Lab[id2]*(alpha2_new-alphas[id2])*dp[id2,:]))
# 更新α
alphas[id1] = alpha1_new
alphas[id2] = alpha2_new
return 1
# --选择变量-- #
def select(i,Ei,entireSet):
global data,dp,Lab,alphas,b,C,E,eps
max_deltaE = 0
maxj = -1
if entireSet == 1: # 从整个训练集中选择
index = random.randint(0, data.shape[0] - 1)
while index == i:
index = random.randint(0, data.shape[0] - 1)
return index
else: # 启发式选择
indexs = [k for k in range(len(alphas)) if (alphas[k] > 0) and (alphas[k] < C)]
Len = len(indexs)
for j in range(Len):
if indexs[j] == i:
continue
Ej = np.dot(list(map(np.dot, Lab, alphas)), dp[indexs[j],:]) + b - Lab[indexs[j]]
deltaE = abs(Ei - Ej)
if deltaE > max_deltaE:
maxj = indexs[j]
max_deltaE = deltaE
return maxj
iter_ = 0
max_iter = 50
entireSet = 1 # 作为一个标记看是选择全遍历还是部分遍历
alpha_change = 0
while (iter_ < max_iter) and ((alpha_change > 0) or entireSet):
alpha_change = 0
# 遍历整个训练集
if entireSet:
i = 0
for i in range(data.shape[0]):
Ei = np.dot(list(map(np.dot, Lab, alphas)), dp[i,:]) + b - Lab[i]
if (Lab[i]*Ei < -eps and alphas[i] < C) or (Lab[i]*Ei > eps and alphas[i] > 0):
# 选择第2个变量
j = select(i,Ei,entireSet)
# 优化
if varPairOpt(i,j):
alpha_change += 1
iter_ += 1
# 遍历满足0 < α < C的训练样本集合
else:
indexs = [k for k in range(data.shape[0] - 1) if (alphas[k] > 0) and (alphas[k] < C)]
Len = len(indexs)
i = 0
for i in range(Len):
Ei = np.dot(list(map(np.dot, Lab, alphas)), dp[indexs[i],:]) + b - Lab[indexs[i]]
if (Lab[indexs[i]]*Ei < -eps and alphas[indexs[i]] < C) or (Lab[indexs[i]]*Ei > eps and alphas[indexs[i]] > 0):
# 选择第2个变量
j = select(indexs[i],Ei,entireSet)
# 优化
if varPairOpt(indexs[i],j):
alpha_change += 1
iter_ += 1
if entireSet: # 此次迭代为全遍历,下次变成部分遍历
entireSet = 0
elif alpha_change == 0: # 若部分遍历没有找到需要交换的α,改为全遍历
entireSet = 1
print("第 " + str(iter_) + " 次迭代")
print(np.around(alphas,decimals=2))