一、SVM原理
线性可分支持向量机
线性支持向量机
非线性支持向量机
说明
在公式中, αi α i 为拉格朗日系数, C C 是惩罚系数, K(xi,xj) K ( x i , x j ) 是核函数。公式的具体推导过程可参考李航《统计学习方法》。
二、SVM代码实现
import numpy as np
import random
from cvxopt import solvers, matrix
import matplotlib.pyplot as plt
class linear_kernel(object):
def __init__(self):
pass
def calculate(self, x, z):
return np.dot(x, z)
def __call__(self, x, z):
return self.calculate(x, z)
class polynomial_kernel(object):
def __init__(self, p=2):
self.__p = p
def calculate(self, x, z):
return (1 + np.dot(x, z)) ** self.__p
def __call__(self, x, z):
return self.calculate(x, z)
class gaussian_kernel(object):
def __init__(self, sigma=5.0):
self.__sigma = sigma
def calculate(self, x, z):
return np.exp(-1 * np.linalg.norm(x - z) ** 2 / (2 * self.__sigma ** 2))
def __call__(self, x, z):
return self.calculate(x, z)
class SVM(object):
def __init__(self, kernel=linear_kernel(), C=None, epsilon=1e-6):
'''
:param kernel:核函数,默认为线性核函数
:param C: 惩罚系数。
:param epsilon: 小于epsilon将被当作0进行处理。
'''
self.w = None # weight
self.b = None # bias
self.sv = [] # support vectors
self.__kernel = kernel
self.__C = C
self.__epsilon = epsilon
self.__alpha = None # Lagrange multipliers
self.__training_x = None # training x
self.__training_y = None # training y
def fit(self, x, y):
self.__training_x = np.array(x)
self.__training_y = np.array(y)
length = len(self.__training_x)
y_gram = np.outer(self.__training_y, self.__training_y)
x_gram = np.zeros((length, length))
for i in range(length):
for j in range(length):
x_gram[i][j] += self.__kernel(self.__training_x[i], self.__training_x[j])
'''
cvxopt.solver.qp(P, q, A, b, G, h):
minimize (1/2)*x'*P*x + q'*x
subject to G*x <= h
A*x = b.
P is a n x n dense or sparse 'd' matrix with the lower triangular
part of P stored in the lower triangle. Must be positive
semidefinite.
q is an n x 1 dense 'd' matrix.
G is an m x n dense or sparse 'd' matrix.
h is an m x 1 dense 'd' matrix.
A is a p x n dense or sparse 'd' matrix.
b is a p x 1 dense 'd' matrix or None.
'''
P = matrix(y_gram * x_gram)
q = matrix(np.ones(length) * -1) # 'q' must be a 'd' matrix with one column
A = matrix(self.__training_y, (1, length))
b = matrix(0.0) # b是一个标量
if self.__C is not None:
G1 = np.diag(np.full(length, -1))
G2 = np.identity(length)
G = matrix(np.vstack((G1, G2)))
h1 = np.zeros(length)
h2 = np.full(length, self.__C)
h = matrix(np.hstack((h1, h2)))
else:
G = matrix(np.identity(length) * -1)
h = matrix(np.zeros(length))
sol = solvers.qp(P, q, G, h, A, b)
self.__alpha = np.ravel(sol['x'])
index = [i for i in range(len(self.__alpha)) if self.__alpha[i] > self.__epsilon]
for ind in index:
self.sv.append(self.__training_x[ind])
self.sv = np.array(self.sv)
if type(self.__kernel) is linear_kernel:
self.w = np.zeros(len(self.__training_x[0]))
for i, j, k in zip(self.__alpha, self.__training_y, self.__training_x):
self.w += i * j * k
self.b = 0
for j in index:
sigma = 0
for i in range(length):
sigma += self.__alpha[i] * self.__training_y[i] * x_gram[i, j]
self.b += self.__training_y[j] - sigma
self.b /= len(index)
def project(self, x):
x = np.asanyarray(x)
if len(x.shape) != 2 or x.shape[1] != self.__training_x.shape[1]:
raise ValueError('input x error!')
if self.w is not None:
return np.dot(x, self.w) + self.b
else:
sigma = np.zeros(len(x))
for pos in range(len(x)):
for i in range(len(self.__alpha)):
sigma[pos] += self.__alpha[i] * self.__training_y[i] * self.__kernel(x[pos], self.__training_x[i])
pass
return sigma
pass
def predict(self, x):
return np.sign(self.project(x))
if __name__ == '__main__':
# 分类决策函数
f = lambda x: 10 / 6 * x - 10 / 3
# 随机在0~10^0~10的范围内部生成n个数据,如果坐标点位于f直线上方,则标记为1, 否则标记为-1
def gennerate_data(n):
X, Y = [], []
for i in range(n):
x, y = random.uniform(0, 10), random.uniform(0, 10)
X.append([x, y])
Y.append(np.sign(y - f(x)))
return np.array(X), np.array(Y)
# 为了方便可视化展示,该函数将正负样本点分离开
def split_data(x, y):
positive = []
negative = []
for i, j in zip(x, y):
if j == 1:
positive.append(i)
else:
negative.append(i)
return np.array(positive), np.array(negative)
# 随机产生300个训练坐标点
train_x, train_y = gennerate_data(300)
svm = SVM()
svm.fit(train_x, train_y)
# 随机产生100个测试节点,并进行预测
test_x, test_y = gennerate_data(100)
predict = svm.predict(test_x)
# 判断预测结果与实际结果是否一致
print(predict == test_y)
p, n = split_data(train_x, train_y)
# 分别绘制正负样本点
plt.plot(p[:, 0], p[:, 1], 'ro')
plt.plot(n[:, 0], n[:, 1], 'bo')
if svm.w is not None:
func = lambda x: (- svm.w[0] / svm.w[1]) * x - svm.b / svm.w[1]
# 绘制超平面
plt.plot([0, 8], [func(0), func(8)])
# 绘制支持向量节点
plt.scatter(svm.sv[:, 0], svm.sv[:, 1], s=100, c='g')
plt.show()
三、结果
pcost dcost gap pres dres
0: -8.4853e+01 -1.8172e+02 1e+03 3e+01 2e+00
1: -2.3861e+02 -2.4917e+02 5e+02 2e+01 1e+00
2: -3.3822e+02 -3.5070e+02 6e+02 2e+01 1e+00
3: -1.2151e+03 -1.2303e+03 6e+02 1e+01 1e+00
4: -2.6331e+03 -2.6573e+03 7e+02 1e+01 1e+00
5: -1.6174e+04 -1.6074e+04 1e+03 1e+01 1e+00
6: -7.9358e+02 -7.8752e+02 3e+03 1e+01 1e+00
7: -2.7878e+03 -2.3236e+03 3e+03 1e+01 8e-01
8: -7.9404e+03 -6.2730e+03 4e+03 1e+01 8e-01
9: -3.0391e+03 -1.9997e+03 6e+03 8e+00 5e-01
10: -2.1539e+03 -9.8886e+02 2e+03 3e+00 2e-01
11: -4.7020e+02 -5.9774e+02 1e+02 9e-13 3e-11
12: -5.3522e+02 -5.3685e+02 2e+00 3e-13 8e-12
13: -5.3624e+02 -5.3625e+02 2e-02 1e-13 8e-12
14: -5.3625e+02 -5.3625e+02 2e-04 3e-13 8e-12
Optimal solution found.
[ True True True True True True True True True True True True
True True True True True True True True True True True True
True True True True True True True True True True True True
True True True True True True True True True True True True
True True True True True True True True True True True True
True True True True True True True True True True True True
True True True True True True True True True True True True
True True True True True True True True True True True True
True True True True]
可视化结果如下:
四、结论
在神经网络出现之前,SVM就作为一种强力的分类器被广泛应用,从图可以看出SVM卓越的分类性能。