SVM是一个很复杂的算法,不是一篇博文就能够讲完的,各位小伙伴可以看看知乎的解释:
https://www.zhihu.com/question/21094489.
具体参考百度百科链接:
https://baike.baidu.com/item/K%E5%9D%87%E5%80%BC%E8%81%9A%E7%B1%BB%E7%AE%97%E6%B3%95/15779627?fromtitle=K-means&fromid=4934806&fr=aladdin.
https://blog.csdn.net/ruthywei/article/details/83045288
from sklearn.svm import SVC
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
def plot_point2(dataArr, labelArr, Support_vector_index):
for i in range(np.shape(dataArr)[0]):
if labelArr[i] == 0:
plt.scatter(dataArr[i][0], dataArr[i][1], c='b', s=20)
elif labelArr[i] == 1:
plt.scatter(dataArr[i][0], dataArr[i][1], c='y', s=20)
else:
plt.scatter(dataArr[i][0], dataArr[i][1], c='g', s=20)
for j in Support_vector_index:
plt.scatter(dataArr[j][0], dataArr[j][1], s=100, c='', alpha=0.5, linewidth=1.5, edgecolor='red')
plt.show()
if __name__ == "__main__":
iris = load_iris()
x, y = iris.data, iris.target
x = x[:, :2]
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)
clf = SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape='ovr', degree=3, gamma=0.1,
kernel='linear', max_iter=-1, probability=False, random_state=None,
shrinking=True, tol=0.001, verbose=False)
# 调参选取最优参数
# clf = GridSearchCV(SVC(), param_grid={"kernel": ['rbf', 'linear', 'poly', 'sigmoid'],
# "C": [0.1, 1, 10], "gamma": [1, 0.1, 0.01]}, cv=3)
clf.fit(X_train, y_train)
# print("The best parameters are %s with a score of %0.2f" % (clf.best_params_, clf.best_score_))
predict_list = clf.predict(X_test)
precition = clf.score(X_test, y_test)
print("preciton is : ", precition * 100, "%")
n_Support_vector = clf.n_support_
print("vector num is : ", n_Support_vector)
Support_vector_index = clf.support_
x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1
y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1
h = 0.02
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
plot_point2(x, y, Support_vector_index)
from sklearn.datasets import make_moons
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
X, y = make_moons(n_samples=100, noise=0.15, random_state=42)
polynomial_svm_clf = Pipeline([
# 将源数据 映射到 3阶多项式
("poly_features", PolynomialFeatures(degree=3)),
# 标准化
("scaler", StandardScaler()),
# SVC线性分类器
("svm_clf", LinearSVC(C=10, loss="hinge", random_state=42))
])
polynomial_svm_clf.fit(X, y)
def plot_dataset(X, y, axes):
plt.plot(X[:, 0][y==0], X[:, 1][y==0], "bs")
plt.plot(X[:, 0][y==1], X[:, 1][y==1], "g^")
plt.axis(axes)
plt.grid(True, which='both')
plt.xlabel(r"$x_1$", fontsize=20)
plt.ylabel(r"$x_2$", fontsize=20, rotation=0)
plt.title("月亮数据",fontsize=20)
def plot_predictions(clf, axes):
# 打表
x0s = np.linspace(axes[0], axes[1], 100)
x1s = np.linspace(axes[2], axes[3], 100)
x0, x1 = np.meshgrid(x0s, x1s)
X = np.c_[x0.ravel(), x1.ravel()]
y_pred = clf.predict(X).reshape(x0.shape)
y_decision = clf.decision_function(X).reshape(x0.shape)
# print(y_pred)
# print(y_decision)
plt.contourf(x0, x1, y_pred, cmap=plt.cm.brg, alpha=0.2)
plt.contourf(x0, x1, y_decision, cmap=plt.cm.brg, alpha=0.1)
plot_predictions(polynomial_svm_clf, [-1.5, 2.5, -1, 1.5])
plot_dataset(X, y, [-1.5, 2.5, -1, 1.5])
plt.show()
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
# 读取数据,并提取花瓣长度和宽度特征
iris = datasets.load_iris()
X = iris["data"][:, (2, 3)] # petal length, petal width
plt.scatter(X[:49, 0], X[:49, 1], color='green', marker='o', label='setosa')
plt.scatter(X[49:99, 0], X[49: 99, 1], color='blue', marker='x', label='versicolor')
plt.xlabel('petal length')
plt.ylabel('petal width')
plt.legend(loc='upper left')
plt.title("鸢尾花数据",fontsize=20)
plt.show()
from sklearn.datasets import make_moons
X, y = make_moons(n_samples=100, noise=0.15, random_state=42)
def plot_dataset(X, y, axes):
plt.plot(X[:, 0][y==0], X[:, 1][y==0], "bs")
plt.plot(X[:, 0][y==1], X[:, 1][y==1], "g^")
plt.axis(axes)
plt.grid(True, which='both')
plt.xlabel(r"$x_1$", fontsize=20)
plt.ylabel(r"$x_2$", fontsize=20, rotation=0)
plt.title("月亮数据",fontsize=20)
plot_dataset(X, y, [-1.5, 2.5, -1, 1.5])
plt.show()
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
# 读取数据,并提取花瓣长度和宽度特征
iris = datasets.load_iris()
X = iris["data"][:, (2, 3)] # petal length, petal width
x_axis = iris["data"][:, (2)]
y_axis = iris["data"][:, (3)]
model = KMeans(n_clusters=2)
#训练模型
model.fit(X)
#选取行标为150的那条数据,进行预测
prddicted_label= model.predict([[71.67, 74]])
#预测全部数据
all_predictions = model.predict(X)
#all_predictions=np.array(all_predictions).reshape(-1,1)
#打印出来数据的聚类散点图
plt.scatter(x_axis,y_axis,c=all_predictions)
plt.show()
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.datasets import make_moons
from sklearn.pipeline import Pipeline
import numpy as np
X,y=make_moons(n_samples=100,shuffle=True,noise=0.15,random_state=42)
clf = KMeans()
clf.fit(X,y)
predicted = clf.predict(X)
plt.scatter(X[:,0], X[:,1], c=predicted, marker='s',s=100,cmap=plt.cm.Paired)
plt.title("KMeans")
plt.show()
算法优点:
(1)使用核函数可以向高维空间进行映射
(2)使用核函数可以解决非线性的分类
(3)分类思想很简单,就是将样本与决策面的间隔最大化
(4)分类效果较好
算法缺点:
(1)SVM算法对大规模训练样本难以实施
(2)用SVM解决多分类问题存在困难
(3)对缺失数据敏感,对参数和核函数的选择敏感
资料参考:
https://www.cnblogs.com/lsm-boke/p/11761534.html
https://www.zhihu.com/question/21094489
https://baike.baidu.com/item/K%E5%9D%87%E5%80%BC%E8%81%9A%E7%B1%BB%E7%AE%97%E6%B3%95/15779627?fromtitle=K-means&fromid=4934806&fr=aladdin
https://blog.csdn.net/ruthywei/article/details/83045288/