三维图显示
from sklearn.datasets import make_blobs
from sklearn.datasets import make_circles
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import numpy as np
x,y=make_circles(n_samples=250, random_state=20,factor=0.5)
r=np.exp(-(x**2)).sum(1)
rlim=np.linspace(min(r),max(r),0.2)
def plot_3D(elev=30,azim=30,x=x,y=y):
ax=plt.subplot(projection='3d')
ax.scatter3D(x[:,0],x[:,1],r,c=y,s=10)
ax.view_init(elev=elev,azim=azim)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('r')
plt.show()
plot_3D(elev=70,azim=30,x=x,y=y)
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
from sklearn import svm
from sklearn.datasets import make_blobs
X,y=make_blobs(n_samples=50,centers=2,random_state=2,cluster_std=0.6)
def plot_svm_decision_bounder(X,y):
ax=plt.gca()
xlim=ax.get_xlim()
ylim=ax.get_ylim()
axis=np.linspace(xlim[0],xlim[1],40)
ayis=np.linspace(ylim[0],ylim[1],40)
axis,ayis=np.meshgrid(axis,ayis)
xy=np.vstack([axis.ravel(),ayis.ravel()]).T
clf=svm.SVC(kernel='linear')
clf=clf.fit(X,y)
p=clf.decision_function(xy).reshape(axis.shape)#计算网格点到决策边界距离
ax.contour(axis,ayis,p,color='k',
levels=[-1,0,1],alpha=0.5,linstyles=['--','-','--'])
ax.set_xlim(xlim)
ax.ylim=(ylim)
plt.scatter(X[:,0],X[:,1],c=y,s=10)
plot_svm_decision_bounder(X,y)
data=load_breast_cancer()
X=data.data
y=data.target
np.unique(y)
X_train,X_test,Y_train,Y_test=train_test_split(data.data,data.target,test_size=0.3)
#help(svm.SVC)
kernels=[ 'linear', 'poly', 'rbf', 'sigmoid']
for kernel in kernels:
time0=time()
clf=svm.SVC(kernel=kernel
,degree=1
,gamma='auto'
,cache_size=5000).fit(X_train,Y_train)
print('the accuracy under kernel %s is %f' %(kernel,clf.score(X_test,Y_test)))
#print(datetime.datetime.fromtimestamp(time()-time0).strftime('%M:%S:%f'))
the accuracy under kernel linear is 0.964912
the accuracy under kernel poly is 0.959064
the accuracy under kernel rbf is 0.619883
the accuracy under kernel sigmoid is 0.619883
查看数据分布
import pandas as pd
data=pd.DataFrame(X)
a=data.describe([0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.99]).T#返回一个描述系列
a=data.describe()
from sklearn.preprocessing import StandardScaler
X=StandardScaler().fit_transform(X)#数据标准化
X_train,X_test,Y_train,Y_test=train_test_split(X,y,test_size=0.3)
#help(svm.SVC)
kernels=[ 'linear', 'poly', 'rbf', 'sigmoid']
for kernel in kernels:
time0=time()
clf=svm.SVC(kernel=kernel
,degree=1
,gamma='auto'
,cache_size=5000).fit(X_train,Y_train)
print('the accuracy under kernel %s is %f' %(kernel,clf.score(X_test,Y_test)))
#print(datetime.datetime.fromtimestamp(time()-time0).strftime('%M:%S:%f'))
the accuracy under kernel linear is 0.976608
the accuracy under kernel poly is 0.982456
the accuracy under kernel rbf is 0.976608
the accuracy under kernel sigmoid is 0.959064
score=[]
gamma_ranges=np.logspace(-10,1,50)
for i in gamma_ranges:
clf=svm.SVC(kernel='rbf'
,gamma=i
,cache_size=5000).fit(X_train,Y_train)
score.append(clf.score(X_test,Y_test))
print(max(score),gamma_ranges[score.index(max(score))])
plt.plot(gamma_ranges,score)
plt.show()
0.9883040935672515 0.020235896477251554
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedShuffleSplit#划分测试样本
gamma_ranges=np.logspace(-10,1,10)
coef0_ranges=np.logspace(0,5,10)
param_grid=dict(gamma=gamma_ranges,coef0=coef0_ranges)
cv=StratifiedShuffleSplit(n_splits=5,test_size=0.3,random_state=50)
clf=svm.SVC(kernel='poly',cache_size=5000,degree=1)
Gs=GridSearchCV(clf,param_grid,cv=10)
Gs.fit(X,y)
#help(StratifiedShuffleSplit)
#clf=svm.SVC(kernel='poly',cache_size=5000,degree=1).fit(X,y)
Gs.best_params_
Gs.best_score_
#调惩罚系数
score=[]
C_ranges=np.linspace(0.01,30,50)
for i in C_ranges:
clf=svm.SVC(kernel='rbf'
,C=i
,gamma=0.020235896477251554
,cache_size=5000).fit(X_train,Y_train)
score.append(clf.score(X_test,Y_test))
print(max(score),C_ranges[score.index(max(score))])
plt.plot(C_ranges,score)
plt.show()