深入浅出机器学习MNIST实战(三)

卷积网络创建,卷积层后是池化层,第三层卷积层后添加了压平层Flatten,三个全连接层

model = models.Sequential()
# 第1层卷积,卷积核大小为3*3,32个,28*28为待训练图片的大小
model.add(layers.Conv2D(
    32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
# 第2层卷积,卷积核大小为3*3,64个
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
# 第3层卷积,卷积核大小为3*3,64个
model.add(layers.Conv2D(64, (3, 3), activation='relu'))

model.add(layers.Flatten())  #
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

数据加载

# mnist数据集存储的位置,如何不存在将自动下载
if True:
    (train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
else:
    train_images = load_train_images()  #自定义加载函数
    train_labels = load_train_labels()
    test_images = load_test_images()
    test_labels = load_test_labels()

数据维度转化,增加了通道维度

# 6万张训练图片,1万张测试图片
train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))

归一化

# 像素值映射到 0 - 1 之间
train_images, test_images = train_images / 255.0, test_images / 255.0

编译模型

self.cnn.model.compile(optimizer='adam',
                       loss='sparse_categorical_crossentropy',
                       metrics=['accuracy'])

模型保存参数

check_path = './ckpt/cp-{epoch:04d}.ckpt'
# period 每隔5epoch保存一次 ,回调函数将在每个epoch后保存模型到check_path
save_model_cb = tf.keras.callbacks.ModelCheckpoint( #(model.save_weights(filepath)
    check_path, save_weights_only=True, verbose=1, period=1)

拟合模型

model.fit(self.data.train_images, self.data.train_labels,epochs=5, callbacks=[save_model_cb])

测试准确度

test_loss, test_acc = self.cnn.model.evaluate(self.data.test_images, self.data.test_labels)
print("准确率: %.4f,共测试了%d张图片 " % (test_acc, len(self.data.test_labels)))

预测图片预处理,两种方式加载图片

# 以黑白方式读取图片
if False:
    img = Image.open(image_path).convert('L')
    flatten_img = np.reshape(img, (28, 28, 1))
    x = np.array([1 - flatten_img])
    print(x.shape)  #(1, 28, 28, 1)
else:
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    rows, cols = img.shape[0:2]
    for i in range(rows):  # 反转一下黑白
        for j in range(rows):
            img[i][j] = 255 - img[i][j]
            # print(i,j)
    x = np.reshape(img,(1,28,28,1))
#endif

预测

y = self.cnn.model.predict(x)

# 因为x只传入了一张图片,取y[0]即可
# np.argmax()取得最大值的下标,即代表的数字
print(image_path)
print(y[0])
print('-> Predict digit', np.argmax(y[0]))

预测结果

深入浅出机器学习MNIST实战(三)_第1张图片

 

完整代码:

import os
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import sys
import decodeidx
from decodeidx import load_train_images,load_train_labels,load_test_images,load_test_labels
''' 
python 3.7 
tensorflow 2.0.0b0 
'''
import matplotlib.pyplot as plt
import cv2
import numpy as np
from PIL import Image

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

class CNN():
    def __init__(self):
        model = models.Sequential()
        # 第1层卷积,卷积核大小为3*3,32个,28*28为待训练图片的大小
        model.add(layers.Conv2D(
            32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
        model.add(layers.MaxPooling2D((2, 2)))
        # 第2层卷积,卷积核大小为3*3,64个
        model.add(layers.Conv2D(64, (3, 3), activation='relu'))
        model.add(layers.MaxPooling2D((2, 2)))
        # 第3层卷积,卷积核大小为3*3,64个
        model.add(layers.Conv2D(64, (3, 3), activation='relu'))

        model.add(layers.Flatten())  #
        model.add(layers.Dense(64, activation='relu'))
        model.add(layers.Dense(10, activation='softmax'))

        model.summary()
        self.model = model
class DataSource(object):
    def __init__(self):
        # mnist数据集存储的位置,如何不存在将自动下载
        if True:
            (train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
        else:
            train_images = load_train_images()
            train_labels = load_train_labels()
            test_images = load_test_images()
            test_labels = load_test_labels()
        # 6万张训练图片,1万张测试图片
        train_images = train_images.reshape((60000, 28, 28, 1))
        test_images = test_images.reshape((10000, 28, 28, 1))
        # 像素值映射到 0 - 1 之间
        train_images, test_images = train_images / 255.0, test_images / 255.0

        self.train_images, self.train_labels = train_images, train_labels
        self.test_images, self.test_labels = test_images, test_labels
class Train:
    def __init__(self):
        self.cnn = CNN()
        self.data = DataSource()

    def train(self):
        check_path = './ckpt/cp-{epoch:04d}.ckpt'
        # period 每隔5epoch保存一次 ,回调函数将在每个epoch后保存模型到check_path
        save_model_cb = tf.keras.callbacks.ModelCheckpoint( #(model.save_weights(filepath)
            check_path, save_weights_only=True, verbose=1, period=1)

        self.cnn.model.compile(optimizer='adam',
                               loss='sparse_categorical_crossentropy',
                               metrics=['accuracy'])
        print(self.data.train_images.shape)

        self.cnn.model.fit(self.data.train_images, self.data.train_labels,epochs=5, callbacks=[save_model_cb])

        test_loss, test_acc = self.cnn.model.evaluate(self.data.test_images, self.data.test_labels)
        print("准确率: %.4f,共测试了%d张图片 " % (test_acc, len(self.data.test_labels)))
class Predict(object):
    def __init__(self):
        latest = tf.train.latest_checkpoint('./ckpt')
        self.cnn = CNN()
        # 恢复网络权重
        self.cnn.model.load_weights(latest)

    def predict(self, image_path):
        # 以黑白方式读取图片
        if False:
            img = Image.open(image_path).convert('L')
            flatten_img = np.reshape(img, (28, 28, 1))
            x = np.array([1 - flatten_img])
            print(x.shape)  #(1, 28, 28, 1)
        else:
            img = cv2.imread(image_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            rows, cols = img.shape[0:2]
            for i in range(rows):  # 反转一下黑白
                for j in range(rows):
                    img[i][j] = 255 - img[i][j]
                    # print(i,j)
            x = np.reshape(img,(1,28,28,1))
        #endif

        y = self.cnn.model.predict(x)

        # 因为x只传入了一张图片,取y[0]即可
        # np.argmax()取得最大值的下标,即代表的数字
        print(image_path)
        print(y[0])
        print('-> Predict digit', np.argmax(y[0]))

Flags =False

if __name__ == "__main__":
    if Flags==True:
        app = Train()
        app.train()
    elif Flags==False:
        app = Predict()
        app.predict('./bmp_mnist/5.jpg')
    else:
        print("exit\n")

CNN方式准确度很高,下篇给出解码mnist数据代码

 

 

你可能感兴趣的:(机器学习)