python机器学习包sklearn支持向量机(SVM)模型

支持向量机(SVMs)是一套用于分类、回归和异常值检测的监督学习方法。

支持向量机的优点是:

  • 在高维空间中有效。
  • 在维数大于样本数的情况下仍然有效。
  • 在决策函数中使用训练点的子集(称为支持向量),所以它的内存也很有效。
  • 多功能性:可以为决策函数指定不同的内核函数。提供了常见的内核,但也可以指定自定义内核。

支持向量机的缺点包括:

  • 如果特征的数量远远大于样本的数量,那么在选择核函数和正则化项时避免过度拟合是至关重要的。
  • 支持向量机不直接提供概率估计,这些概率是通过昂贵的五倍交叉验证来计算的(见下文的分数和概率)。

1. 二分类

### 1. 二分类
from sklearn import svm
X = [[0, 0], [1, 1]]
y = [0, 1]
print(dir(svm.SVC())) # 类中的属性和函数
clf = svm.SVC() #  默认kernel='rbf'
#clf = svm.SVC(kernel='linear')
#clf = svm.SVC(kernel='rbf')
print(clf.kernel)
clf.fit(X, y)

print(clf.predict([[2., 2.]]))

# get support vectors
print(clf.support_vectors_)
# get indices of support vectors
print(clf.support_)
# get number of support vectors for each class
print(clf.n_support_)

2. 多分类

### 2.多分类
X = [[0], [1], [2], [3]]
Y = [0, 1, 2, 3]
clf = svm.SVC(decision_function_shape='ovo')  # “one-versus-one”
# clf = svm.SVC(decision_function_shape='ovr')   # one-vs-rest”
print(clf)
# In total, n_classes * (n_classes - 1) / 2 classifiers are constructed
clf.fit(X, Y)
y_pred = clf.predict(X)
print(y_pred)

dec = clf.decision_function([[1]])
print(dec)
print(dec.shape[1]) # 4 classes: 4*3/2 = 6

clf.decision_function_shape = "ovr"
print(clf)
clf.fit(X, Y)
y_pred = clf.predict(X)
print(y_pred)
dec = clf.decision_function([[1]])
print(dec)
dec.shape[1] # 4 classes

from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold

## 载入数据
iris = load_iris()
X = iris.data
y = iris.target
#print(X)
#print(y)

## 数据预处理
scaler = StandardScaler()
X = scaler.fit_transform(X)

## 模型参数选择
C_range = np.logspace(-2, 10, 13)
gamma_range = np.logspace(-9, 3, 13)
param_grid = dict(gamma=gamma_range, C=C_range)

# C:惩罚参数,惩罚松弛变量,默认值是1.0。C值大,对训练集分类更加正确。
# 对训练集测试时准确率很高,但泛化能力弱。
# C值小,对误分类的惩罚减小,允许容错,将他们当成噪声点,泛化能力较强。
# gamma : ‘rbf’,‘poly’ 和‘sigmoid’的核函数参数。默认是’auto’,则会选择1/n_features

cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv)
grid.fit(X, y)

print(
    "The best parameters are %s with a score of %0.2f"
    % (grid.best_params_, grid.best_score_)
)

## 根据最好的参数训练模型和评价
# 划分数据集
skf=StratifiedKFold(n_splits=3)
skf.get_n_splits(X,y)
print(skf)
for train_index,test_index in skf.split(X,y):
    # print("Train Index:",train_index,",Test Index:",test_index)
    X_train,X_test=X[train_index],X[test_index]
    y_train,y_test=y[train_index],y[test_index]

    # 训练集上训练模型
clf = svm.SVC(gamma =0.1,C=1.0,decision_function_shape='ovo')
clf.fit(X_train,y_train)

# 测试集上评价模型
predict = clf.predict(X_test)
accuracy = accuracy_score(y_test,predict)
print(accuracy)

# LinearSVC implements “one-vs-the-rest” multi-class strategy, 
# thus training n_classes models.
lin_clf = svm.LinearSVC()
lin_clf.fit(X, Y)
dec = lin_clf.decision_function([[1]])
print(dec)
dec.shape[1]

3. 回归

### 3. Regression
import numpy as np
from sklearn import svm
import matplotlib.pyplot as plt

X = [[0, 0], [2, 2]]
y = [0.5, 2.5]
regr = svm.SVR()

regr.fit(X, y)

regr.predict([[1, 1]])

## Generate sample data
X = np.sort(5 * np.random.rand(40, 1), axis=0)
# np.random.rand(d0,d1,d2……dn): [0,1)均匀分布的随机样本值
# np.random.randn(d0,d1,d2……dn):标准正态分布中抽样
y = np.sin(X).ravel()
# numpy.ravel() 多维度拉成一维数组,返回的是视图,会影响原始矩阵;
# numpy.flatten() 返回的是拷贝,对拷贝所做的修改不会影响原始矩阵
# print(X)
# print(y)
# add noise to targets
y[::5] += 3 * (0.5 - np.random.rand(8))
print(y[::5])

## 模型
svr_rbf = SVR(kernel="rbf", C=100, gamma=0.1, epsilon=0.1)
svr_lin = SVR(kernel="linear", C=100, gamma="auto")
svr_poly = SVR(kernel="poly", C=100, gamma="auto", degree=3, epsilon=0.1, coef0=1)

lw = 2

svrs = [svr_rbf, svr_lin, svr_poly]
kernel_label = ["RBF", "Linear", "Polynomial"]
model_color = ["m", "c", "g"]
## 训练和展示
# 函数返回figure图像和子图axes的array列表。
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 10), sharey=True)
for ix, svr in enumerate(svrs):
    axes[ix].plot(
        X,
        svr.fit(X, y).predict(X),
        color=model_color[ix],
        lw=lw,
        label="{} model".format(kernel_label[ix]),
    )
    axes[ix].scatter(
        X[svr.support_],
        y[svr.support_],
        facecolor="none",
        edgecolor=model_color[ix],
        s=50,
        label="{} support vectors".format(kernel_label[ix]),
    )
    axes[ix].scatter(
        X[np.setdiff1d(np.arange(len(X)), svr.support_)],
        y[np.setdiff1d(np.arange(len(X)), svr.support_)],
        facecolor="none",
        edgecolor="k",
        s=50,
        label="other training data",
    )
    axes[ix].legend(
        loc="upper center",
        bbox_to_anchor=(0.5, 1.1),
        ncol=1,
        fancybox=True,
        shadow=True,
    )

fig.text(0.5, 0.04, "data", ha="center", va="center")
fig.text(0.06, 0.5, "target", ha="center", va="center", rotation="vertical")
fig.suptitle("Support Vector Regression", fontsize=14)
plt.show()

参考:

https://scikit-learn.org/stable/modules/svm.html

你可能感兴趣的:(机器学习,支持向量机,python)