In [1]:
import numpy as np import matplotlib.pyplot as plt from sklearn import svm # 直接用sklearn中的包 from sklearn.datasets import make_blobs # 生成样本点数据,生成聚类的数据
In [2]:
%matplotlib inline
In [3]:
# 随机生成两个数据簇,保证线性可分 X, y = make_blobs(n_samples=100, centers=2, random_state=3) # 100个样本,两个簇
In [4]:
X.shape, y.shape
Out[4]:
((100, 2), (100,))
In [5]:
# 简单线性核 clf = svm.SVC(kernel='linear', C=1000.0) # 构建分类器 clf.fit(X, y) plt.scatter(X[:, 0], X[:, 1], c=y, s = 30, cmap=plt.cm.Paired) ax = plt.gca() xlim = ax.get_xlim() ylim = ax.get_ylim() # 计算决策边界 xx = np.linspace(xlim[0], xlim[1], 30) yy = np.linspace(ylim[0], ylim[1], 30) YY, XX = np.meshgrid(yy, xx) xy = np.vstack([XX.ravel(), YY.ravel()]).T Z = clf.decision_function(xy).reshape(XX.shape) # 会值决策边界以及间隔 ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5, linestyles=['--', '-', '--']) plt.show()
In [1]:
import numpy as np import matplotlib.pyplot as plt from sklearn import svm # 直接调用sklearn中的svm接口
In [2]:
# 线性不可分数据 X = np.c_[(.4, -.7), # np.c_ 是按照行往后排列,也就是生成的数据只有一行 (-1.5, -1), (-1.4, -.9), (-1.3, -1.2), (-1.1, -.2), (-1.2, -.4), (-.5, 1.2), (-1.5, 2.1), (1, 1), # -- (1.3, .8), (1.2, .5), (.2, -2), (.5, -2.4), (.2, -2.3), (0, -2.7), (1.3, 2.1)].T Y = [0] * 8 + [1] * 8 # 前8个一类,后8个一类
In [3]:
X # 列表中的一行数据实际表示一个列向量,只是在列表中显示一行
Out[3]:
array([[ 0.4, -0.7], [-1.5, -1. ], [-1.4, -0.9], [-1.3, -1.2], [-1.1, -0.2], [-1.2, -0.4], [-0.5, 1.2], [-1.5, 2.1], [ 1. , 1. ], [ 1.3, 0.8], [ 1.2, 0.5], [ 0.2, -2. ], [ 0.5, -2.4], [ 0.2, -2.3], [ 0. , -2.7], [ 1.3, 2.1]])
In [4]:
Y
Out[4]:
[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
In [4]:
fignum = 1 # 表示第几幅图
In [5]:
# 使用三种不同的核,线性核,多项式核,高斯核 for kernel in ('linear', 'poly', 'rbf'): clf = svm.SVC(kernel=kernel, gamma=2) # gamma=1/(2*σ^2) clf.fit(X, Y) # 拟合 plt.figure(fignum, figsize=(4, 3)) plt.clf() # 支撑向量 plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=80, facecolors='none', zorder=10, edgecolors='k') # 第一类 plt.scatter(X[:8, 0], X[:8, 1], c='g', zorder=10, cmap=plt.cm.Paired, edgecolors='k') # 第二类 plt.scatter(X[8:, 0], X[8:, 1], c='r', zorder=10, cmap=plt.cm.Paired, edgecolors='k') plt.axis('tight') # 使坐标系跟数据范围一致 x_min = -3 x_max = 3 y_min = -3 y_max = 3 XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j] # 复数类似np.linspace Z = clf.decision_function(np.c_[XX.ravel(), YY.ravel()]) # 决策边界 # 绘制决策边界和间距 Z = Z.reshape(XX.shape) plt.figure(fignum, figsize=(4, 3)) plt.contour(XX, YY, Z, colors=['g', 'r', 'g'], linestyles=['--', '-', '--'], levels=[-.5, 0, .5]) # 为了图看着好看,间距设置成0.5 plt.xlim(x_min, x_max) plt.ylim(y_min, y_max) fignum = fignum + 1 plt.show()
In [1]:
import numpy as np import matplotlib.pyplot as plt from sklearn import svm, datasets
In [2]:
from pylab import mpl mpl.rcParams['font.sans-serif'] = ['SimHei'] # 指定默认字体 mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
In [3]:
def make_meshgrid(x, y, h=.02): """准备用于绘图的网格点 参数 ---------- x: x轴数据点 y: y轴数据点 h: 间隔距离 返回值 ------- xx, yy : ndarray """ x_min, x_max = x.min() - 1, x.max() + 1 y_min, y_max = y.min() - 1, y.max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) return xx, yy
In [4]:
def plot_contours(ax, clf, xx, yy, **params): """绘制分类器的边界 参数 ---------- ax: matplotlib axes对象 clf: 一个分类器 xx: 网格点 yy: 网格点 params: 控制绘图的其他参数字典 """ Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) out = ax.contour(xx, yy, Z, **params) return out
In [5]:
# 载入鸢尾花数据集 iris = datasets.load_iris() # 为了后面绘图方便,我们只使用两个特征 X = iris.data[:, :2] y = iris.target C = 1.0 # 备用的各种模型设置 models = (svm.SVC(kernel='linear', C=C), svm.LinearSVC(C=C), svm.SVC(kernel='rbf', gamma=0.7, C=C), svm.SVC(kernel='poly', degree=3, C=C)) # 训练模型 models = (clf.fit(X, y) for clf in models) # 各模型标题 titles = (u'SVC(线性核)', u'LinearSVC (线性核)', u'SVC(RBF)', u'SVC(3次多项式核)') # 把整个图划分成2*2网格 fig, sub = plt.subplots(2, 2,figsize=(12,8)) plt.subplots_adjust(wspace=0.2, hspace=0.2) X0, X1 = X[:, 0], X[:, 1] xx, yy = make_meshgrid(X0, X1) for clf, title, ax in zip(models, titles, sub.flatten()): plot_contours(ax, clf, xx, yy,alpha=0.8) ax.scatter(X0, X1, c=y, cmap=plt.cm.coolwarm, s=20, edgecolors='k') ax.set_xlim(xx.min(), xx.max()) ax.set_ylim(yy.min(), yy.max()) ax.set_xlabel(u'萼片长') ax.set_ylabel(u'萼片宽') ax.set_xticks(()) # 不显示坐标 ax.set_yticks(()) # 不显示坐标 ax.set_title(title) plt.show()