支持向量机——非线性分类SVM

模型原型
sklearn.svm.SVC(C=1.0,kernel=’rbf’,degree=3,gamma=’auto’,coef0=0.0,shrinking=True,probability=False,tol=0.001,cache_size=200, class_weight=None,verbose=False,max_iter=-1,decision_function_shape=None,random_state=None)
参数

  • C
  • kernel
  • degree
  • gamma
  • coef0
  • shrinking:是否使用启发式收缩(shrinking heuristic)
  • probability:是否进行概率估计(必须在训练之前设置好,且概率估计会拖慢训练速度)
  • tol
  • cache_size:指定了kernel cache的大小,单位为MB
  • class_weight
  • verbose
  • max_iter
  • decision_function_shape:指定决策函数的形状
    • ’ovr’:使用one-vs-rest准则,决策函数的形状是(n_samples,n_classes)
    • ‘ovo’:使用one-vs-one准则,决策函数的形状是(n_samples,n_classes*(n_classes-1)/2)
    • None:默认值
  • random_state

属性

  • support_:一个数组,形状为[n_SV],支持向量的下标
  • supportvectors:一个数组,形状为[n_SV,n_features],支持向量
  • n_support:一个数组-like,形状为[n_class],每一个分类的支持向量的个数
  • dual_coef:一个数组,形状为[n_class-1,n_SV] (对偶问题中,在分类决策函数中每个支持向量的系数)
  • coef_:一个数组,形状为[n_class-1,n_features] (原始问题中,每个特征的系数,只在linear
    kernel中有效)
  • intercept_:一个数组,形状为[n_class*(n_class)/2]决策函数中的常数项

方法

  • fit(X,y[,sample_weight])
  • predict(X)
  • score(X,y[,sample_weight])
  • predict_log_proba(X)
  • predict_proba(X)
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets,linear_model,cross_validation,svm

加载数据

def load_data_classfication():
    iris=datasets.load_iris()
    X_train=iris.data
    y_train=iris.target
    return cross_validation.train_test_split(X_train,y_train,test_size=0.25,random_state=0,stratify=y_train)

不同的核的影响

#线性核
def test_SVC_linear(*data):
    X_train,X_test,y_train,y_test=data
    cls=svm.SVC(kernel='linear')
    cls.fit(X_train,y_train)
    print('Coefficients:%s,\nintercept %s'%(cls.coef_,cls.intercept_))
    print('Score:%.2f'%cls.score(X_test,y_test))

X_train,X_test,y_train,y_test=load_data_classfication()
test_SVC_linear(X_train,X_test,y_train,y_test)
#多项式核
def test_SVC_poly(*data):
    X_train,X_test,y_train,y_test=data
    fig=plt.figure()

    #测试degree
    degrees=range(1,20)
    train_scores=[]
    test_scores=[]
    for degree in degrees:
        cls=svm.SVC(kernel='poly',degree=degree)
        cls.fit(X_train,y_train)
        train_scores.append(cls.score(X_train,y_train))
        test_scores.append(cls.score(X_test,y_test))
    ax=fig.add_subplot(1,3,1)
    ax.plot(degrees,train_scores,label="Training score",marker='x')
    ax.plot(degrees,test_scores,label='Testing score',marker='o')
    ax.set_title('SVC_poly_degree')
    ax.set_xlabel('p')
    ax.set_ylabel('score')
    ax.set_ylim(0,1.05)
    ax.legend(loc='best',framealpha=0.5)

    #测试gamma
    gammas=range(1,20)
    train_scores=[]
    test_scores=[]
    for gamma in gammas:
        cls=svm.SVC(kernel='poly',gamma=gamma,degree=3)
        cls.fit(X_train,y_train)
        train_scores.append(cls.score(X_train,y_train))
        test_scores.append(cls.score(X_test,y_test))
    ax=fig.add_subplot(1,3,2)
    ax.plot(gammas,train_scores,label='Training score',marker='+')
    ax.plot(gammas,test_scores,label='Testing score',marker='o')
    ax.set_title('SVC_poly_gamma')
    ax.set_xlabel(r'$\gamma$')
    ax.set_ylabel('score')
    ax.set_ylim(0,1.05)
    ax.legend(loc='best',framealpha=0.5)

    #测试r
    rs=range(20)
    train_scores=[]
    test_scores=[]
    for r in rs:
        cls=svm.SVC(kernel='poly',gamma=10,degree=3,coef0=r)
        cls.fit(X_train,y_train)
        train_scores.append(cls.score(X_train,y_train))
        test_scores.append(cls.score(X_test,y_test))
    ax=fig.add_subplot(1,3,3)
    ax.plot(rs,train_scores,label="Training score",marker='+')
    ax.plot(rs,test_scores,label='Testing scores',marker='o')
    ax.set_title('SVC_poly_r')
    ax.set_xlabel(r'r')
    ax.set_ylabel('score')
    ax.set_ylim(0,1.05)
    ax.legend(loc='best',framealpha=0.5)
    plt.show()

test_SVC_poly(X_train,X_test,y_train,y_test)
#高斯核
def test_SVC_rbf(*data):
    X_train,X_test,y_train,y_test=data
    gammas=range(1,20)
    train_scores=[]
    test_scores=[]
    for gamma in gammas:
        cls=svm.SVC(kernel='rbf',gamma=gamma)
        cls.fit(X_train,y_train)
        train_scores.append(cls.score(X_train,y_train))
        test_scores.append(cls.score(X_test,y_test))
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    ax.plot(gammas,train_scores,label="Training score",marker='+')
    ax.plot(gammas,test_scores,label='Testing score',marker='o')
    ax.set_title('SVC_rbf')
    ax.set_xlabel(r'$\gamma$')
    ax.set_ylabel('score')
    ax.set_ylim(0,1.05)
    ax.legend(loc='best',framealpha=0.5)
    plt.show()

test_SVC_rbf(X_train,X_test,y_train,y_test)
#sigmoid核
def test_SVC_sigmoid(*data):
    X_train,X_test,y_train,y_test=data
    fig=plt.figure()

    #测试gamma
    gammas=np.logspace(-2,1)
    train_scores=[]
    test_scores=[]
    for gamma in gammas:
        cls=svm.SVC(kernel='sigmoid',gamma=gamma,coef0=0)
        cls.fit(X_train,y_train)
        train_scores.append(cls.score(X_train,y_train))
        test_scores.append(cls.score(X_test,y_test))
    ax=fig.add_subplot(1,2,1)
    ax.plot(gammas,train_scores,label='Training score',marker='+')
    ax.plot(gammas,test_scores,label="testing score",marker='o')
    ax.set_title('SVC_sigmoid_gammas')
    ax.set_xscale('log')
    ax.set_xlabel(r'$\gamma$')
    ax.set_ylabel('score')
    ax.set_ylim(0,1.05)
    ax.legend(loc='best',framealpha=0.5)

    #测试r
    rs=np.linspace(0,5)
    train_scores=[]
    test_scores=[]
    for r in rs:
        cls=svm.SVC(kernel='sigmoid',coef0=r,gamma=0.01)
        cls.fit(X_train,y_train)
        train_scores.append(cls.score(X_train,y_train))
        test_scores.append(cls.score(X_test,y_test))
    ax=fig.add_subplot(1,2,2)
    ax.plot(rs,train_scores,label="Training score",marker='+')
    ax.plot(rs,test_scores,label='Testing score',marker='o')
    ax.set_title('SVC_sigmoid_r')
    ax.set_xlabel(r'r')
    ax.set_ylabel('score')
    ax.set_ylim(0,1.05)
    ax.legend(loc='best',framealpha=0.5)
    plt.show()

test_SVC_sigmoid(X_train,X_test,y_train,y_test)

你可能感兴趣的:(机器学习)