《统计学习方法》学习之三:SVM

一、SVM原理

  1. 线性可分支持向量机

    minαs.t.12i=1Nj=1Nαiαjyiyj(xixj)i=1Nαii=1Nαiyi=00αi,i=1,2,,N(37)(38)(39) (37) min α 1 2 ∑ i = 1 N ∑ j = 1 N α i α j y i y j ( x i ⋅ x j ) − ∑ i = 1 N α i (38) s . t . ∑ i = 1 N α i y i = 0 (39) 0 ≤ α i , i = 1 , 2 , ⋯ , N

  2. 线性支持向量机

    minαs.t.12i=1Nj=1Nαiαjyiyj(xixj)i=1Nαii=1Nαiyi=00αiC,i=1,2,,N(40)(41)(42) (40) min α 1 2 ∑ i = 1 N ∑ j = 1 N α i α j y i y j ( x i ⋅ x j ) − ∑ i = 1 N α i (41) s . t . ∑ i = 1 N α i y i = 0 (42) 0 ≤ α i ≤ C , i = 1 , 2 , ⋯ , N

  3. 非线性支持向量机

    minαs.t.12i=1Nj=1NαiαjyiyjK(xi,xj)i=1Nαii=1Nαiyi=00αiC,i=1,2,,N(43)(44)(45) (43) min α 1 2 ∑ i = 1 N ∑ j = 1 N α i α j y i y j K ( x i , x j ) − ∑ i = 1 N α i (44) s . t . ∑ i = 1 N α i y i = 0 (45) 0 ≤ α i ≤ C , i = 1 , 2 , ⋯ , N

  4. 说明
      在公式中, αi α i 为拉格朗日系数, C C 是惩罚系数, K(xi,xj) K ( x i , x j ) 是核函数。公式的具体推导过程可参考李航《统计学习方法》。

二、SVM代码实现

import numpy as np
import random
from cvxopt import solvers, matrix
import matplotlib.pyplot as plt


class linear_kernel(object):
    def __init__(self):
        pass

    def calculate(self, x, z):
        return np.dot(x, z)

    def __call__(self, x, z):
        return self.calculate(x, z)


class polynomial_kernel(object):
    def __init__(self, p=2):
        self.__p = p

    def calculate(self, x, z):
        return (1 + np.dot(x, z)) ** self.__p

    def __call__(self, x, z):
        return self.calculate(x, z)


class gaussian_kernel(object):
    def __init__(self, sigma=5.0):
        self.__sigma = sigma

    def calculate(self, x, z):
        return np.exp(-1 * np.linalg.norm(x - z) ** 2 / (2 * self.__sigma ** 2))

    def __call__(self, x, z):
        return self.calculate(x, z)


class SVM(object):
    def __init__(self, kernel=linear_kernel(), C=None, epsilon=1e-6):
        '''
        :param kernel:核函数,默认为线性核函数
        :param C: 惩罚系数。
        :param epsilon: 小于epsilon将被当作0进行处理。
        '''
        self.w = None # weight
        self.b = None # bias

        self.sv = [] # support vectors

        self.__kernel = kernel
        self.__C = C
        self.__epsilon = epsilon

        self.__alpha = None # Lagrange multipliers
        self.__training_x = None # training x
        self.__training_y = None # training y

    def fit(self, x, y):
        self.__training_x = np.array(x)
        self.__training_y = np.array(y)
        length = len(self.__training_x)

        y_gram = np.outer(self.__training_y, self.__training_y)

        x_gram = np.zeros((length, length))
        for i in range(length):
            for j in range(length):
                x_gram[i][j] += self.__kernel(self.__training_x[i], self.__training_x[j])

        '''
        cvxopt.solver.qp(P, q, A, b, G, h):
        minimize    (1/2)*x'*P*x + q'*x
        subject to  G*x <= h
                    A*x = b.

        P is a n x n dense or sparse 'd' matrix with the lower triangular
        part of P stored in the lower triangle.  Must be positive
        semidefinite.

        q is an n x 1 dense 'd' matrix.

        G is an m x n dense or sparse 'd' matrix.

        h is an m x 1 dense 'd' matrix.

        A is a p x n dense or sparse 'd' matrix.

        b is a p x 1 dense 'd' matrix or None.

        '''
        P = matrix(y_gram * x_gram)
        q = matrix(np.ones(length) * -1)  # 'q' must be a 'd' matrix with one column

        A = matrix(self.__training_y, (1, length))
        b = matrix(0.0) # b是一个标量

        if self.__C is not None:
            G1 = np.diag(np.full(length, -1))
            G2 = np.identity(length)
            G = matrix(np.vstack((G1, G2)))
            h1 = np.zeros(length)
            h2 = np.full(length, self.__C)
            h = matrix(np.hstack((h1, h2)))
        else:
            G = matrix(np.identity(length) * -1)
            h = matrix(np.zeros(length))

        sol = solvers.qp(P, q, G, h, A, b)

        self.__alpha = np.ravel(sol['x'])

        index = [i for i in range(len(self.__alpha)) if self.__alpha[i] > self.__epsilon]

        for ind in index:
            self.sv.append(self.__training_x[ind])

        self.sv = np.array(self.sv)

        if type(self.__kernel) is linear_kernel:
            self.w = np.zeros(len(self.__training_x[0]))

            for i, j, k in zip(self.__alpha, self.__training_y, self.__training_x):
                self.w += i * j * k

        self.b = 0
        for j in index:
            sigma = 0
            for i in range(length):
                sigma += self.__alpha[i] * self.__training_y[i] * x_gram[i, j]
            self.b += self.__training_y[j] - sigma

        self.b /= len(index)


    def project(self, x):
        x = np.asanyarray(x)
        if len(x.shape) != 2 or x.shape[1] != self.__training_x.shape[1]:
            raise ValueError('input x error!')

        if self.w is not None:
            return np.dot(x, self.w) + self.b
        else:
            sigma = np.zeros(len(x))
            for pos in range(len(x)):
                for i in range(len(self.__alpha)):
                    sigma[pos] += self.__alpha[i] * self.__training_y[i] * self.__kernel(x[pos], self.__training_x[i])
                    pass
            return sigma
        pass

    def predict(self, x):
        return np.sign(self.project(x))


if __name__ == '__main__':

    # 分类决策函数
    f = lambda x: 10 / 6 * x - 10 / 3

    # 随机在0~10^0~10的范围内部生成n个数据,如果坐标点位于f直线上方,则标记为1, 否则标记为-1
    def gennerate_data(n):
        X, Y = [], []
        for i in range(n):
            x, y = random.uniform(0, 10), random.uniform(0, 10)
            X.append([x, y])
            Y.append(np.sign(y - f(x)))

        return np.array(X), np.array(Y)

    # 为了方便可视化展示,该函数将正负样本点分离开
    def split_data(x, y):
        positive = []
        negative = []
        for i, j in zip(x, y):
            if j == 1:
                positive.append(i)
            else:
                negative.append(i)
        return np.array(positive), np.array(negative)

    # 随机产生300个训练坐标点
    train_x, train_y = gennerate_data(300)

    svm = SVM()

    svm.fit(train_x, train_y)

    # 随机产生100个测试节点,并进行预测
    test_x, test_y = gennerate_data(100)

    predict = svm.predict(test_x)

    # 判断预测结果与实际结果是否一致
    print(predict == test_y)

    p, n = split_data(train_x, train_y)

    # 分别绘制正负样本点
    plt.plot(p[:, 0], p[:, 1], 'ro')
    plt.plot(n[:, 0], n[:, 1], 'bo')

    if svm.w is not None:
        func = lambda x: (- svm.w[0] / svm.w[1]) * x - svm.b / svm.w[1]
        # 绘制超平面
        plt.plot([0, 8], [func(0), func(8)])

        # 绘制支持向量节点
        plt.scatter(svm.sv[:, 0], svm.sv[:, 1], s=100, c='g')

        plt.show()

三、结果

     pcost       dcost       gap    pres   dres
 0: -8.4853e+01 -1.8172e+02  1e+03  3e+01  2e+00
 1: -2.3861e+02 -2.4917e+02  5e+02  2e+01  1e+00
 2: -3.3822e+02 -3.5070e+02  6e+02  2e+01  1e+00
 3: -1.2151e+03 -1.2303e+03  6e+02  1e+01  1e+00
 4: -2.6331e+03 -2.6573e+03  7e+02  1e+01  1e+00
 5: -1.6174e+04 -1.6074e+04  1e+03  1e+01  1e+00
 6: -7.9358e+02 -7.8752e+02  3e+03  1e+01  1e+00
 7: -2.7878e+03 -2.3236e+03  3e+03  1e+01  8e-01
 8: -7.9404e+03 -6.2730e+03  4e+03  1e+01  8e-01
 9: -3.0391e+03 -1.9997e+03  6e+03  8e+00  5e-01
10: -2.1539e+03 -9.8886e+02  2e+03  3e+00  2e-01
11: -4.7020e+02 -5.9774e+02  1e+02  9e-13  3e-11
12: -5.3522e+02 -5.3685e+02  2e+00  3e-13  8e-12
13: -5.3624e+02 -5.3625e+02  2e-02  1e-13  8e-12
14: -5.3625e+02 -5.3625e+02  2e-04  3e-13  8e-12
Optimal solution found.
[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True]

可视化结果如下:

image

四、结论
  在神经网络出现之前,SVM就作为一种强力的分类器被广泛应用,从图可以看出SVM卓越的分类性能。

你可能感兴趣的:(机器学习,统计学习方法,SVM)