手写数字识别-传统机器学习

# 1.导包
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import load_digits
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import AdaBoostClassifier

from sklearn.metrics import accuracy_score

# 2.加载数据并探索

# 加载数据
digits = load_digits()
data = digits.data

# 数据探索
print(data.shape)
# 查看第一幅图像
print(digits.images[0])
# 第一幅图像代表的数字含义
print(digits.target[0])

# 将第一幅图像显示出来
plt.imshow(digits.images[0])
plt.show()
(1797, 64)
[[ 0.  0.  5. 13.  9.  1.  0.  0.]
 [ 0.  0. 13. 15. 10. 15.  5.  0.]
 [ 0.  3. 15.  2.  0. 11.  8.  0.]
 [ 0.  4. 12.  0.  0.  8.  8.  0.]
 [ 0.  5.  8.  0.  0.  9.  8.  0.]
 [ 0.  4. 11.  0.  1. 12.  7.  0.]
 [ 0.  2. 14.  5. 10. 12.  0.  0.]
 [ 0.  0.  6. 13. 10.  0.  0.  0.]]
0

Process finished with exit code 0

手写数字识别-传统机器学习_第1张图片

# 3.分割数据集并规范化

# 数据及目标
data1 = digits.data
target1 = digits.target

# # 数据增广
# data2 = np.vstack((data1, data1))
# target2 = np.hstack((target1, target1))
data2 = np.vstack((data1, data1, data1))
target2 = np.hstack((target1, target1, target1))
# 分割数据,将25%的数据作为测试集,其余作为训练集(你也可以指定其他比例的数据作为训练集)
# train_x, test_x, train_y, test_y = train_test_split(data1, target1, test_size=0.25)
train_x, test_x, train_y, test_y = train_test_split(data2, target2, test_size=0.25)

# 采用z-score规范化
ss = StandardScaler()
train_ss_scaled = ss.fit_transform(train_x)
test_ss_scaled = ss.transform(test_x)
# 采用0-1归一化,有分类器不能为负数,如多项式朴素贝叶斯分类
mm = MinMaxScaler()
train_mm_scaled = mm.fit_transform(train_x)
test_mm_scaled = mm.transform(test_x)
# 4.建立模型,并进行比较
models = {}
models['knn'] = KNeighborsClassifier()
models['svm'] = SVC()
models['bayes'] = MultinomialNB()
models['tree'] = DecisionTreeClassifier()
models['ada'] = AdaBoostClassifier(base_estimator=models['tree'], learning_rate=0.1)

for model_key in models.keys():
    if model_key == 'knn' or model_key == 'svm' or model_key == 'ada':
        model = models[model_key]
        model.fit(train_ss_scaled, train_y)
        predict = model.predict(test_ss_scaled)
        print(model_key, "准确率:", accuracy_score(test_y, predict))
    else:
        model = models[model_key]
        model.fit(train_mm_scaled, train_y)
        predict = model.predict(test_mm_scaled)
        print(model_key, "准确率: ", accuracy_score(test_y, predict))

knn 准确率: 0.9799703264094956
svm 准确率: 0.9985163204747775
bayes 准确率:  0.8835311572700296
tree 准确率:  0.9955489614243324
ada 准确率: 0.9933234421364985

Process finished with exit code 0

你可能感兴趣的:(#,机器学习项目,机器学习)