支持向量机(SVMs)是一套用于分类、回归和异常值检测的监督学习方法。
支持向量机的优点是:
支持向量机的缺点包括:
### 1. 二分类
from sklearn import svm
X = [[0, 0], [1, 1]]
y = [0, 1]
print(dir(svm.SVC())) # 类中的属性和函数
clf = svm.SVC() # 默认kernel='rbf'
#clf = svm.SVC(kernel='linear')
#clf = svm.SVC(kernel='rbf')
print(clf.kernel)
clf.fit(X, y)
print(clf.predict([[2., 2.]]))
# get support vectors
print(clf.support_vectors_)
# get indices of support vectors
print(clf.support_)
# get number of support vectors for each class
print(clf.n_support_)
### 2.多分类
X = [[0], [1], [2], [3]]
Y = [0, 1, 2, 3]
clf = svm.SVC(decision_function_shape='ovo') # “one-versus-one”
# clf = svm.SVC(decision_function_shape='ovr') # one-vs-rest”
print(clf)
# In total, n_classes * (n_classes - 1) / 2 classifiers are constructed
clf.fit(X, Y)
y_pred = clf.predict(X)
print(y_pred)
dec = clf.decision_function([[1]])
print(dec)
print(dec.shape[1]) # 4 classes: 4*3/2 = 6
clf.decision_function_shape = "ovr"
print(clf)
clf.fit(X, Y)
y_pred = clf.predict(X)
print(y_pred)
dec = clf.decision_function([[1]])
print(dec)
dec.shape[1] # 4 classes
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
## 载入数据
iris = load_iris()
X = iris.data
y = iris.target
#print(X)
#print(y)
## 数据预处理
scaler = StandardScaler()
X = scaler.fit_transform(X)
## 模型参数选择
C_range = np.logspace(-2, 10, 13)
gamma_range = np.logspace(-9, 3, 13)
param_grid = dict(gamma=gamma_range, C=C_range)
# C:惩罚参数,惩罚松弛变量,默认值是1.0。C值大,对训练集分类更加正确。
# 对训练集测试时准确率很高,但泛化能力弱。
# C值小,对误分类的惩罚减小,允许容错,将他们当成噪声点,泛化能力较强。
# gamma : ‘rbf’,‘poly’ 和‘sigmoid’的核函数参数。默认是’auto’,则会选择1/n_features
cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv)
grid.fit(X, y)
print(
"The best parameters are %s with a score of %0.2f"
% (grid.best_params_, grid.best_score_)
)
## 根据最好的参数训练模型和评价
# 划分数据集
skf=StratifiedKFold(n_splits=3)
skf.get_n_splits(X,y)
print(skf)
for train_index,test_index in skf.split(X,y):
# print("Train Index:",train_index,",Test Index:",test_index)
X_train,X_test=X[train_index],X[test_index]
y_train,y_test=y[train_index],y[test_index]
# 训练集上训练模型
clf = svm.SVC(gamma =0.1,C=1.0,decision_function_shape='ovo')
clf.fit(X_train,y_train)
# 测试集上评价模型
predict = clf.predict(X_test)
accuracy = accuracy_score(y_test,predict)
print(accuracy)
# LinearSVC implements “one-vs-the-rest” multi-class strategy,
# thus training n_classes models.
lin_clf = svm.LinearSVC()
lin_clf.fit(X, Y)
dec = lin_clf.decision_function([[1]])
print(dec)
dec.shape[1]
### 3. Regression
import numpy as np
from sklearn import svm
import matplotlib.pyplot as plt
X = [[0, 0], [2, 2]]
y = [0.5, 2.5]
regr = svm.SVR()
regr.fit(X, y)
regr.predict([[1, 1]])
## Generate sample data
X = np.sort(5 * np.random.rand(40, 1), axis=0)
# np.random.rand(d0,d1,d2……dn): [0,1)均匀分布的随机样本值
# np.random.randn(d0,d1,d2……dn):标准正态分布中抽样
y = np.sin(X).ravel()
# numpy.ravel() 多维度拉成一维数组,返回的是视图,会影响原始矩阵;
# numpy.flatten() 返回的是拷贝,对拷贝所做的修改不会影响原始矩阵
# print(X)
# print(y)
# add noise to targets
y[::5] += 3 * (0.5 - np.random.rand(8))
print(y[::5])
## 模型
svr_rbf = SVR(kernel="rbf", C=100, gamma=0.1, epsilon=0.1)
svr_lin = SVR(kernel="linear", C=100, gamma="auto")
svr_poly = SVR(kernel="poly", C=100, gamma="auto", degree=3, epsilon=0.1, coef0=1)
lw = 2
svrs = [svr_rbf, svr_lin, svr_poly]
kernel_label = ["RBF", "Linear", "Polynomial"]
model_color = ["m", "c", "g"]
## 训练和展示
# 函数返回figure图像和子图axes的array列表。
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 10), sharey=True)
for ix, svr in enumerate(svrs):
axes[ix].plot(
X,
svr.fit(X, y).predict(X),
color=model_color[ix],
lw=lw,
label="{} model".format(kernel_label[ix]),
)
axes[ix].scatter(
X[svr.support_],
y[svr.support_],
facecolor="none",
edgecolor=model_color[ix],
s=50,
label="{} support vectors".format(kernel_label[ix]),
)
axes[ix].scatter(
X[np.setdiff1d(np.arange(len(X)), svr.support_)],
y[np.setdiff1d(np.arange(len(X)), svr.support_)],
facecolor="none",
edgecolor="k",
s=50,
label="other training data",
)
axes[ix].legend(
loc="upper center",
bbox_to_anchor=(0.5, 1.1),
ncol=1,
fancybox=True,
shadow=True,
)
fig.text(0.5, 0.04, "data", ha="center", va="center")
fig.text(0.06, 0.5, "target", ha="center", va="center", rotation="vertical")
fig.suptitle("Support Vector Regression", fontsize=14)
plt.show()
参考:
https://scikit-learn.org/stable/modules/svm.html