趁着国庆尾巴,复习了Paddle框架进行深度学习实践:手写数字识别,这里分享下模型实现。
这里给大家分享下手写数字识别的主要步骤:
# 导入飞桨和其他相关库
import paddle
from paddle.nn import Conv2D, MaxPool2D, Linear
import paddle.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import gzip
import os
import json
import random
from PIL import Image
# 创建一个类MnistDataset, 继承paddle.io.Dataset,配合DataLoader实现数据异步加载
class MnistDataset(paddle.io.Dataset):
def __init__(self, mode='train'):
datafile = './work/mnist.json.gz'
data = json.load(gzip.open(datafile))
# 划分数据集为训练集、验证集和测试集
train_set, val_set, test_set = data[:3]
# 图片高度和宽度
self.IMG_ROWS, self.IMG_COLS = 28, 28
if mode == 'train':
# 训练数据集
imgs, labels = train_set[:2]
elif mode == 'valid':
imgs, labels = val_set[:2]
elif mode == 'eval':
imgs, labels = test_set[:2]
else:
raise Exception("mode can only be one of [train, valid, eval]")
# 校验数据
imgs_length = len(imgs)
assert len(imgs) == len(labels), \
"length of train_imgs({}) should be the same with train_labels({})".format(
len(imgs), len(labels)
)
self.imgs = imgs
self.labels = labels
def __getitem__(self, idx):
img = np.reshape(self.imgs[idx], [1, self.IMG_ROWS, self.IMG_COLS]).astype('float32')
label = np.reshape(self.labels[idx], [1]).astype('int64')
return img, label
def __len__(self):
return len(self.imgs)
# 定义网络结构, 多层卷积神经网络
class MNIST_CNN(paddle.nn.Layer):
def __init__(self):
super(MNIST_CNN, self).__init__()
# 定义卷积层
self.conv1 = Conv2D(in_channels=1, out_channels=20, kernel_size=5, stride=1, padding=2)
# 定义池化层
self.max_pool1 = MaxPool2D(kernel_size=2, stride=2)
self.conv2 = Conv2D(in_channels=20, out_channels=20, kernel_size=5, stride=1, padding=2)
self.max_pool2 = MaxPool2D(kernel_size=2, stride=2)
# 定义全连接层,输出维度为10
self.fc = Linear(in_features=980, out_features=10)
# 定义前向计算过程
def forward(self, inputs):
x = self.conv1(inputs)
x = F.relu(x)
x = self.max_pool1(x)
x = self.conv2(x)
x = F.relu(x)
x = self.max_pool2(x)
x = paddle.reshape(x, [x.shape[0], 980])
x = self.fc(x)
return x
# 定义评估函数
def evaluation(model, val_loader):
model.eval()
acc_set = list()
for batch_id, data in enumerate(val_loader()):
images, labels = data[:2]
images = paddle.to_tensor(images)
labels = paddle.to_tensor(labels)
pred = model(images)
acc = paddle.metric.accuracy(input=pred, label=labels)
acc_set.extend(acc.numpy())
# 计算多个batch的准确率
acc_val_mean = np.array(acc_set).mean()
return acc_val_mean
# 定义训练函数,使用交叉熵损失函数
def train(model, train_loader):
model.train()
opt = paddle.optimizer.SGD(learning_rate=0.001, parameters=model.parameters())
ce_loss = paddle.nn.loss.CrossEntropyLoss()
EPOCH_NUM = 10
for epoch_id in range(EPOCH_NUM):
for batch_id, data in enumerate(train_loader()):
# 准备数据
images, labels = data[:2]
images = paddle.to_tensor(images)
labels = paddle.to_tensor(labels)
# 前向计算过程
preds = model(images)
# 损失计算过程
loss = ce_loss(preds, labels)
if batch_id % 200 == 0:
print("epoch: {}, batch: {}, loss: {}".format(
epoch_id, batch_id, loss.numpy()[0]
))
# 反向传播过程,计算各层梯度值
loss.backward()
# 网络参数更新
opt.step()
# 清空各层梯度值
opt.clear_grad()
# 保存模型参数
paddle.save(model.state_dict(), './mnist.pdparams')
def train_process():
# 声明数据加载实例,使用训练模式,
train_dataset = MnistDataset(mode='train')
# 调用DataLoader生成一个批次数据迭代器,异步获取
train_loader = paddle.io.DataLoader(train_dataset, batch_size=100, shuffle=True, drop_last=True)
# 声明数据加载实例,使用验证集
val_dataset = MnistDataset(mode='valid')
val_loader = paddle.io.DataLoader(val_dataset, batch_size=128, drop_last=True)
# 创建模型实例
model = MNIST_CNN()
# 启动训练过程
train(model, train_loader)
# 启动评估过程
acc_train_mean = evaluation(model, train_loader)
acc_val_mean = evaluation(model, val_loader)
print('train acc:{}, val acc:{}'.format(
acc_train_mean, acc_val_mean
))
# 读取本地图片,转变成模型输入格式
def load_image(img_path):
# 读取图片,并转换为灰度图
im = Image.open(img_path).convert('L')
im = im.resize((28, 28), Image.ANTIALIAS)
im = np.array(im).reshape(1,1, 28, 28).astype(np.float32)
# 图像归一化
im = im / 255
return im
# 定义预测过程
def predict_process():
model = MNIST_CNN()
params_file_path = './mnist.pdparams'
img_path = './images/0.jpg'
# 加载模型参数
param_dict = paddle.load(params_file_path)
model.load_dict(param_dict)
# 加载数据
model.eval()
tensor_img = load_image(img_path)
# 模型返回10个分类标签对应的概率
results = model(paddle.to_tensor(tensor_img))
# 取概率最大的标签作为预测输出
label = np.argsort(results.numpy())
print('本次预测数字:', label[0][-1])
if __name__ == "__main__":
train_process()
predict_process()
这里实践,让我意识到超参数调整的重要性,让我印象深刻的就是优化函数中学习率的调整,过大或过小值都会导致损失值下降变慢,进而导致训练时间长的问题。
手写数字识别数据集集源码请参考gitee链接:
https://gitee.com/dttrcv/paddle-practice/blob/master/HandWrittenDigitRec/baseline_loss.py
https://www.paddlepaddle.org.cn/tutorials/projectdetail/2310369