此案例参考python数据科学手册,属于分类的一种,觉得挺有意思的。上代码
from sklearn.datasets import load_digits
import seaborn as sns
#导入数据
digits=load_digits()
X=digits.data
y=digits.target
print(digits.images.shape)
import matplotlib.pyplot as plt
#画子图
fig,axes=plt.subplots(10,10,figsize=(8,8)
,subplot_kw={'xticks':[],'yticks':[]}
,gridspec_kw=dict(hspace=0.1,wspace=0.1))
for i,ax in enumerate(axes.flat):
ax.imshow(digits.images[i],cmap='binary'
,interpolation='nearest')
ax.text(0.05,0.05,str(digits.target[i]),
transform=ax.transAxes,color='green')
plt.show()
#流行学习将数据降维到2维
from sklearn.manifold import Isomap
iso=Isomap(n_components=2)
iso.fit(digits.data)
data_projected=iso.transform(digits.data)
plt.scatter(data_projected[:,0],data_projected[:,1],c=digits.target,edgecolor='none',alpha=0.5
)
plt.colorbar(label='digit label',ticks=range(10))
plt.clim(-0.5,9.5)
plt.show()
#划分数据
from sklearn.model_selection import train_test_split
Xtrain,Xtest,ytrain,ytest=train_test_split(X,y,random_state=0)
#导入贝叶斯分类算法
from sklearn.naive_bayes import GaussianNB
model=GaussianNB()
model.fit(Xtrain,ytrain)
y_model=model.predict(Xtest)
#准确率评估
from sklearn.metrics import accuracy_score
print(accuracy_score(ytest,y_model))
#用混淆矩阵评估
from sklearn.metrics import confusion_matrix
mat=confusion_matrix(ytest,y_model)
sns.heatmap(mat,square=True,annot=True,cbar=False)
plt.xlabel('predicted value')
plt.ylabel('true value')
plt.show()
#预测后的图形与原始图比较
fig,axes=plt.subplots(10,10,figsize=(8,8),subplot_kw={'xticks':[],'yticks':[]},
gridspec_kw=dict(hspace=0.1,wspace=0.1))
test_images=Xtest.reshape(-1,8,8)
for i,ax in enumerate(axes.flat):
ax.imshow(digits.images[i], cmap='binary'
, interpolation='nearest')
ax.text(0.05, 0.05, str(y_model[i]),
transform=ax.transAxes, color='green' if (ytest[i]==y_model[i]) else 'red')
plt.show()
数据集的标签
降维到2维的效果
混淆矩阵判别分类效果
真实值与预测值的比较