sklearn学习笔记——svm

三维图显示

from sklearn.datasets import make_blobs
from sklearn.datasets import make_circles
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import numpy as np
x,y=make_circles(n_samples=250, random_state=20,factor=0.5)
r=np.exp(-(x**2)).sum(1)
rlim=np.linspace(min(r),max(r),0.2)

def plot_3D(elev=30,azim=30,x=x,y=y):
    ax=plt.subplot(projection='3d')
    ax.scatter3D(x[:,0],x[:,1],r,c=y,s=10)
    ax.view_init(elev=elev,azim=azim)
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('r')
    plt.show()
plot_3D(elev=70,azim=30,x=x,y=y)

sklearn学习笔记——svm_第1张图片

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
from sklearn import svm
from sklearn.datasets import make_blobs
X,y=make_blobs(n_samples=50,centers=2,random_state=2,cluster_std=0.6)
def plot_svm_decision_bounder(X,y):
    
    ax=plt.gca()
    xlim=ax.get_xlim()
    ylim=ax.get_ylim()
    axis=np.linspace(xlim[0],xlim[1],40)
    ayis=np.linspace(ylim[0],ylim[1],40)
    axis,ayis=np.meshgrid(axis,ayis)
    xy=np.vstack([axis.ravel(),ayis.ravel()]).T

    clf=svm.SVC(kernel='linear')
    clf=clf.fit(X,y)
    p=clf.decision_function(xy).reshape(axis.shape)#计算网格点到决策边界距离
    ax.contour(axis,ayis,p,color='k',
               levels=[-1,0,1],alpha=0.5,linstyles=['--','-','--'])
    ax.set_xlim(xlim)
    ax.ylim=(ylim)

plt.scatter(X[:,0],X[:,1],c=y,s=10)
plot_svm_decision_bounder(X,y)

sklearn学习笔记——svm_第2张图片
t调参

data=load_breast_cancer()
X=data.data
y=data.target
np.unique(y)
X_train,X_test,Y_train,Y_test=train_test_split(data.data,data.target,test_size=0.3)
#help(svm.SVC)
kernels=[ 'linear', 'poly', 'rbf', 'sigmoid']
for kernel in kernels:
    time0=time()
    clf=svm.SVC(kernel=kernel
                ,degree=1
                ,gamma='auto'
                ,cache_size=5000).fit(X_train,Y_train)
    print('the accuracy under kernel %s is %f' %(kernel,clf.score(X_test,Y_test)))
    #print(datetime.datetime.fromtimestamp(time()-time0).strftime('%M:%S:%f'))

the accuracy under kernel linear is 0.964912
the accuracy under kernel poly is 0.959064
the accuracy under kernel rbf is 0.619883
the accuracy under kernel sigmoid is 0.619883

查看数据分布

import pandas as pd
data=pd.DataFrame(X)
a=data.describe([0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.99]).T#返回一个描述系列
a=data.describe()
from sklearn.preprocessing import StandardScaler
X=StandardScaler().fit_transform(X)#数据标准化

X_train,X_test,Y_train,Y_test=train_test_split(X,y,test_size=0.3)
#help(svm.SVC)
kernels=[ 'linear', 'poly', 'rbf', 'sigmoid']
for kernel in kernels:
    time0=time()
    clf=svm.SVC(kernel=kernel
                ,degree=1
                ,gamma='auto'
                ,cache_size=5000).fit(X_train,Y_train)
    print('the accuracy under kernel %s is %f' %(kernel,clf.score(X_test,Y_test)))
    #print(datetime.datetime.fromtimestamp(time()-time0).strftime('%M:%S:%f'))

the accuracy under kernel linear is 0.976608
the accuracy under kernel poly is 0.982456
the accuracy under kernel rbf is 0.976608
the accuracy under kernel sigmoid is 0.959064
score=[]
gamma_ranges=np.logspace(-10,1,50)
for i in gamma_ranges:
    clf=svm.SVC(kernel='rbf'
                ,gamma=i
                ,cache_size=5000).fit(X_train,Y_train)
    score.append(clf.score(X_test,Y_test))
print(max(score),gamma_ranges[score.index(max(score))])
plt.plot(gamma_ranges,score)
plt.show()

sklearn学习笔记——svm_第3张图片
0.9883040935672515 0.020235896477251554

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedShuffleSplit#划分测试样本
gamma_ranges=np.logspace(-10,1,10)
coef0_ranges=np.logspace(0,5,10)
param_grid=dict(gamma=gamma_ranges,coef0=coef0_ranges)
cv=StratifiedShuffleSplit(n_splits=5,test_size=0.3,random_state=50)
clf=svm.SVC(kernel='poly',cache_size=5000,degree=1)
Gs=GridSearchCV(clf,param_grid,cv=10)
Gs.fit(X,y)
#help(StratifiedShuffleSplit)
#clf=svm.SVC(kernel='poly',cache_size=5000,degree=1).fit(X,y)
Gs.best_params_
Gs.best_score_
#调惩罚系数
score=[]
C_ranges=np.linspace(0.01,30,50)
for i in C_ranges:
    clf=svm.SVC(kernel='rbf'
                ,C=i
                ,gamma=0.020235896477251554
                ,cache_size=5000).fit(X_train,Y_train)
    score.append(clf.score(X_test,Y_test))
print(max(score),C_ranges[score.index(max(score))])
plt.plot(C_ranges,score)
plt.show()

sklearn学习笔记——svm_第4张图片
0.9883040935672515 1.2340816326530613

你可能感兴趣的:(机器学习)