逻辑回归和支持向量机都是机器学习中的二分类算法,却都可以拓展为多分类,程序中SVM采用了多项式核。
#!/usr/bin/python
# -*- coding:utf-8 -*-
import numpy as np
from numpy import mat
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from scipy import stats
from sklearn.metrics import accuracy_score
import matplotlib as mpl
import matplotlib.pyplot as plt
#设置画图过程中,图像的最小值 与最大值取值
def extend(a, b, r):
x = a - b
m = (a + b) / 2
return m-r*x/2, m+r*x/2
if __name__ == "__main__":
np.random.seed(0)
N = 20
x = np.empty((4*N, 2))
print('x',x.shape,type(x))
means = [(-1, 1), (1, 1), (1, -1), (-1, -1)]
sigmas = [np.eye(2), 2*np.eye(2), np.diag((1,2)), np.array(((2,1),(1,2)))]
for i in range(4):
mn = stats.multivariate_normal(means[i], sigmas[i]*0.3)
x[i*N:(i+1)*N, :] = mn.rvs(N)
a = np.array((0,1,2,3)).reshape((-1, 1))
y = np.tile(a, N).flatten()#np.tile将原矩阵横向、纵向地复制;.flatten()返回一个折叠成一维的数组
#========SVM分类==========
clf = svm.SVC(C=0.1, kernel='poly', gamma=1, decision_function_shape='ovo')
#decision_function_shape='ovo'时,为one v one,即将类别两两之间进行划分,用二分类的方法模拟多分类的结果。
#clf = svm.SVC(C=1, kernel='linear', decision_function_shape='ovr')
#decision_function_shape='ovr'时,为one v rest,即一个类别与其他类别进行划分
print('svm模型:\n',clf)
clf.fit(x, y)
y_hat = clf.predict(x)
acc = accuracy_score(y, y_hat)
np.set_printoptions(suppress=True)
print ('SVM准确率:',acc = accuracy_score(y, y_hat))
#print ('function',clf.decision_function(x))#决策函数decision_function(),计算样本点到分割超平面的函数距离
#=======逻辑回归=======
Y=y.reshape((-1,1))
b = np.ones(Y.shape)
X = np.column_stack((b,x))
print(type(X),X.shape,type(b),b.shape)
model = LogisticRegression()
model.fit(mat(X), mat(Y))
predictedLR = model.predict(mat(X))
print('逻辑回归模型:\n',model)
print('逻辑回归准确率:', accuracy_score(y, predictedLR))
#=========画图=========
x1_min, x2_min = np.min(x, axis=0)
x1_max, x2_max = np.max(x, axis=0)
x1_min, x1_max = extend(x1_min, x1_max, 1.05)
x2_min, x2_max = extend(x2_min, x2_max, 1.05)
x1, x2 = np.mgrid[x1_min:x1_max:500j, x2_min:x2_max:500j]
x_test = np.stack((x1.flat, x2.flat), axis=1)
y_test = clf.predict(x_test)
y_test = y_test.reshape(x1.shape)
cm_light = mpl.colors.ListedColormap(['#FF8080', '#A0FFA0', '#6060FF', '#F080F0'])
cm_dark = mpl.colors.ListedColormap(['r', 'g', 'b', 'm'])
mpl.rcParams['font.sans-serif'] = [u'SimHei']
mpl.rcParams['axes.unicode_minus'] = False
plt.figure(facecolor='w')
plt.pcolormesh(x1, x2, y_test, cmap=cm_light)
plt.scatter(x[:, 0], x[:, 1], s=40, c=y, cmap=cm_dark, alpha=0.7)
#plt.scatter(mat(X)[:,1].flatten().A[0], mat(X[:,2].flatten().A[0],c=predictedLR.tolist(),marker='x')
plt.xlim((x1_min, x1_max))
plt.ylim((x2_min, x2_max))
plt.grid(b=True)
plt.tight_layout(pad=2.5)
plt.title(u'SVM多分类方法:One/One or One/Other', fontsize=18)
plt.show()
svm模型:
SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape='ovo', degree=3, gamma=1, kernel='poly',
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False)
SVM准确率: 0.8
逻辑回归模型:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
verbose=0, warm_start=False)
逻辑回归模型准确率: 0.8