查看svm.SCV的不同kernel在moons、circles、blob、classification数据上的表现
kernel= linear/poly 为线性核
kernel= rbf/sigmoid 为非线性核
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.datasets import make_circles, make_moons, make_blobs, make_classification
n_sample=100
datasets=[
make_moons(n_sample),
make_circles(n_sample, noise=0.2, factor=0.5),
make_blobs(n_sample, centers=2),
make_classification(n_samples=n_sample, n_features=2, n_informative=2, n_redundant=0)
]
kernel=['linear', 'poly', 'rbf', 'sigmoid']
for idx ,(X, y) in enumerate(datasets):
plt.subplot(idx+1,5,1)
plt.scatter(X[:,0],X[:,1], c=y)
for i in range(len(kernel)):
clf=svm.SVC(kernel=kernel[i], gamma='auto').fit(X, y)
score=clf.score(X,y)
print(kernel[i],score)
y_pred=clf.predict(X)
plt.subplot(idx+1,5,i+2)
plt.scatter(X[:,0],X[:,1], c=y_pred)
plt.show()
调整松弛系数C
data=make_moons(n_sample)
X, y=data[0], data[1]
c_range=[1,5,10,20]
for i in c_range:
clf_soft=svm.SVC(kernel='rbf', gamma='auto', C=i).fit(X, y)
score=clf_soft.score(X,y)
print(score)
样本数量不均衡时,设置类别权重class_weight
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.datasets import make_blobs
pos_sample, neg_sample=200, 100
X, y=make_blobs(n_samples=[pos_sample, neg_sample],
centers=[[0.,0.],[2.,2.]])
plt.scatter(X[:, 0], X[:,1],c=y)
# plt.show()
clf=svm.SVC(kernel='rbf', C=1.0, class_weight='balanced').fit(X, y)
#svm.SVC(kernel='linear', class_weight={1:2})
clf.score(X,y)