期末考试前复习了这学期机器学习学习的几种算法(包含线性回归,逻辑回归,神经网络,决策树,K近邻,朴素贝叶斯,K均值聚类,支持向量机),发现自己差的还很远。在这里简单把复习的时候整理的代码存档一下。也希望对大家有一些帮助。代码都实测运行过,笔者用的是spyder编译器。
线性回归
#线性回归
import numpy as np
import matplotlib.pyplot as plt
k,b=2,5
x0=np.arange(0,10,0.2)
y0=k*x0+b
#噪声点
noise=np.random.normal(1,2,50)
y_noise=y0+noise
from sklearn.linear_model import LinearRegression
#构建模型
XM=np.vstack([np.ones(len(x0)),x0]).T
model=LinearRegression(fit_intercept=True)
#拟合
model.fit(XM,y_noise)
k=model.coef_[1]
b=model.intercept_
y_fitted=k*x0+b
plt.plot(x0,y0,marker='o',linestyle='')
plt.plot(x0,y0,linestyle='-',c='r')
plt.plot(x0,y_noise,marker='o',c='b',linestyle='')
plt.plot(x0,y_fitted,linestyle='-',c='g')
plt.show()
逻辑回归
#逻辑回归
from sklearn.datasets import make_moons
x,y=make_moons(n_samples=100,noise=0.4)
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.4)
from sklearn.linear_model import LogisticRegression
#构建模型
LR=LogisticRegression()
#拟合
LR.fit(x_train,y_train)
#求精度
c=LR.score(x_test,y_test)
#对数据进行预测和分类
d=LR.predict(x[1:4,:])
e=LR.predict_proba(x[1:4,:])
print(d)
print(e)
import numpy as np
import matplotlib.pyplot as plt
#逻辑回归并画出分类边界线
x1=np.arange(-1,2,0.1)
plt.scatter(x[:,0],x[:,1],c=y,marker='o')
w=LR.coef_
b=LR.intercept_
x2=(w[0][0]*x1+b)/(-w[0][1])
plt.plot(x1,x2,'r-',linewidth=2)
plt.show()
神经网络
#使用make_blobs生成数据 并把数据分为训练数据和测试数据
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
x,y=make_blobs(n_samples=300,n_features=2,centers=[[1,3],[3,4]],cluster_std=[0.3,0.6])
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.4)
from sklearn.neural_network import MLPClassifier
#构建模型
mlf=MLPClassifier(solver='lbfgs',hidden_layer_sizes=4,
alpha=1e-5)
#拟合
mlf.fit(x_train,y_train)
#求精度
c=mlf.score(x_test,y_test)
#对数据进行预测和分类
d=mlf.predict(x[1:4,:])
e=mlf.predict_proba(x[1:4,:])
print(d)
print(e)
print(c)
K近邻
#使用make_moons生成数据 并把数据分为训练数据和测试数据
from sklearn.datasets import make_moons
X,y=make_moons(n_samples=100,noise=0.4)
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.4)
from sklearn.neighbors import KNeighborsClassifier
#构建模型
knn=KNeighborsClassifier(n_neighbors=5)
#拟合
knn.fit(x_train,y_train)
#求精度
b=knn.score(x_test,y_test)
#对数据进行预测和分类
d=knn.predict(X[1:4,:])
e=knn.predict_proba(X[1:4,:])
print(d)
print(e)
print(b)
决策树
#使用make_moons生成数据 并把数据分为训练数据和测试数据
from sklearn.datasets import make_moons
X,y=make_moons(n_samples=100,noise=0.4)
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.4)
from sklearn.tree import DecisionTreeClassifier
#构建模型
model=DecisionTreeClassifier(criterion="entropy",max_depth=4)
#拟合
model.fit(x_train,y_train)
#求精度
c=model.score(x_test,y_test)
#对数据进行预测和分类
d=model.predict(X[1:4,:])
e=model.predict_proba(X[1:4,:])
print(d)
print(e)
print(c)
朴素贝叶斯
#引入numpy库
import numpy as np
from sklearn.naive_bayes import GaussianNB
x=np.array([[1],[2],[0],[1],[1],[2],[0],[0],[1],[0],[1],[2],[2],[0]])
y=np.array([0,1,1,1,1,1,0,0,1,1,0,1,1,0])
#构建模型
nb=GaussianNB()
#拟合
nb.fit(x,y)
#对数据进行预测和分类
b=nb.predict_proba([[1]])
c=nb.predict([[0]])
print(b)
print(c)
K均值聚类
#使用make_moons生成数据 并把数据分为训练数据和测试数据
from sklearn.datasets import make_moons
x,y=make_moons(n_samples=100,noise=0.4)
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.4)
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn import metrics
#对样本进行聚类后求取Calcalinski_Harabasz
y_predict=KMeans(n_clusters=4).fit_predict(x)
b=metrics.calinski_harabaz_score(x,y_predict)
print(b)
#聚类
plt.scatter(x[:,0],x[:,1],c=y_predict)
plt.show()
#支持向量机
#使用make_moons生成数据 并把数据分为训练数据和测试数据
from sklearn.datasets import make_moons
X,y=make_moons(n_samples=80,noise=0.4)
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.4)
from sklearn import svm
model1=svm.SVC(C=1.0,kernel="linear",gamma=2)
model1.fit(x_train,y_train)
c=model1.score(x_test,y_test)
d=model1.predict(X[1:4,:])
print(d)
print(c)
K近邻
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
x, y = make_moons(100, noise = 0.5) #产生数据
x_train,x_test,y_train,y_test=train_test_split(x, y, test_size=0.4) #训练与测试样本
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5,weights='distance')
knn.fit(x_train, y_train) #根据训练样本构建模型
b=knn.score(x_test,y_test)
print(b)