
6.2 试用LIBSVM,在西瓜数据集3.0 α 上分别用线性核和高斯核训练一个SVM,并比较其支持向量的差别。


# -*- coding: utf-8 -*- 
"""Excercise 6.2"""
import numpy as np
import pandas as pd
from sklearn import svm

data = pd.read_csv(filepath_or_buffer = '../dataset/watermelon3.0alpha.csv')

#Seperata the data into samples and labels
columns = data.columns
attributes = columns[:-1]
label = columns[-1]
samples_frame = data[attributes]
label_series = data[label]
#Up to now, we do not need DataFrame any more, instead, an array is more suitable
samples = samples_frame.values
labels = label_series.tolist()

#linear kernel
clf = svm.SVC(kernel = 'linear') 
#Get support vectors
print clf.support_vectors_
#Get indices of support vectors
print clf.support_ 
#Get number of support vectors for each classes
print clf.n_support_

#gaussian kernel 
clf1 = svm.SVC(kernel = 'rbf') 
#Get support vectors
print clf1.support_vectors_
#Get indices of support vectors
print clf1.support_ 
#Get number of support vectors for each classes
print clf1.n_support_


[[ 0.666  0.091]
 [ 0.243  0.267]
 [ 0.343  0.099]
 [ 0.639  0.161]
 [ 0.657  0.198]
 [ 0.36   0.37 ]
 [ 0.593  0.042]
 [ 0.719  0.103]
 [ 0.697  0.46 ]
 [ 0.774  0.376]
 [ 0.634  0.264]
 [ 0.608  0.318]
 [ 0.556  0.215]
 [ 0.403  0.237]
 [ 0.481  0.149]
 [ 0.437  0.211]]
[ 8  9 11 12 13 14 15 16  0  1  2  3  4  5  6  7]
[8 8]
[[ 0.666  0.091]
 [ 0.243  0.267]
 [ 0.343  0.099]
 [ 0.639  0.161]
 [ 0.657  0.198]
 [ 0.36   0.37 ]
 [ 0.593  0.042]
 [ 0.719  0.103]
 [ 0.697  0.46 ]
 [ 0.774  0.376]
 [ 0.634  0.264]
 [ 0.608  0.318]
 [ 0.556  0.215]
 [ 0.403  0.237]
 [ 0.481  0.149]
 [ 0.437  0.211]]
[ 8  9 11 12 13 14 15 16  0  1  2  3  4  5  6  7]
[8 8]
