基于PyTorch的验证码识别

基于PyTorch的验证码识别

假期之前就有了实现一个验证码识别的神经网络的打算。假期开始就跟着西瓜书和吴恩达的公开课整体学习了一遍并自己手撕了一个神经网络,结果非常不理想,准确率只有20%左右 (甚至不如OCR),期间还经历了虚拟机死机, 于是就转而使用PyTorch框架实现。

PyTorch简介

PyTorch是一个开源的Python机器学习库,基于Torch,底层由C++实现,应用于人工智能领域,如自然语言处理。它主要由Facebook的人工智能研究团队开发,并且被用于Uber的概率编程软件Pyro。

PyTorch主要有两大特征:

类似于NumPy的张量计算,可使用GPU加速;
基于带自动微分系统的深度神经网络。
PyTorch包括torch.nn、torch.optim等子模块。
来自:PyTorch-维基百科

PyTorch框架的基本流程

Created with Raphaël 2.3.0 构建数据集 构建网络模型 训练模型 检测模型

构建数据集

获取验证码图片的代码按下不表,图片大小为40×100。构建数据集是将图片按照框架要求放入torch.Tensor中。

import re
import torch
from torch.utils.data import Dataset
import os
from PIL import Image
from torchvision import transforms


class MyDataset(Dataset):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((40, 100)),
        transforms.Grayscale()
    ])

    def __init__(self, root_dir):
        super(MyDataset, self).__init__()
        self.image_path = [os.path.join(root_dir, file_name) for file_name in os.listdir(root_dir)]

    def __len__(self):
        return len(self.image_path)

    def __getitem__(self, item):
        image = Image.open(self.image_path[item])
        image = MyDataset.transform(image)
        image_name = self.image_path[item].split('/')[-1]
        code = image_name.split('_')[0]
        code = MyDataset.encode(code)  # 转换为展平的单热点编码tensor
        return image, code

    @staticmethod
    def encode(code):
        """
        :param code:验证码
        :return: 展平后的单热点编码Tensor
        使用单热点编码,便于神经网络输出
        """
        all_code = list('0123456789')
        code = ''.join(re.findall(r'\d', code))
        encoded = torch.zeros(len(code), len(all_code), dtype=torch.int)
        for i in range(len(code)):
            encoded[i, all_code.index(code[i])] = int(1)
        encoded = torch.flatten(encoded)
        return encoded

    @staticmethod
    def decode(code_tensor):
        all_code = list('0123456789')
        f = code_tensor.view(4, 10)
        result = []
        for row in f:
            result.append(all_code[torch.argmax(row, dim=0)])
        result = ''.join(result)
        return result

构建网络模型

pytorch的神经网络主要就是实现forward函数(即神经网络中的前馈),反向传播会自动生成。

import torch
from torch import nn
from MyDataset import MyDataset


class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=(3, 3), padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(3, 3), padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=(3, 3), padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.layer4 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=(3, 3), padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.layer5 = nn.Sequential(
            nn.Flatten(),  # torch.Size([64, 6144])
            nn.Linear(6144, 3072),
            nn.Dropout(),
            nn.Linear(3072, 40)
        )

    def forward(self, x):
        x = self.layer1(x)  # torch.Size([64, 64, 20, 50])
        x = self.layer2(x)  # torch.Size([64, 128, 10, 25])
        x = self.layer3(x)  # torch.Size([64, 256, 5, 12])
        x = self.layer4(x)  # torch.Size([64, 512, 2, 6])
        x = self.layer5(x)  # torch.Size([64, 40])
        return x

训练与测试

PyTorch的训练、测试也是按照框架要求完成即可。

from torch.utils.data import DataLoader
from MyDataset import MyDataset
import torch
from torch import nn
from torch.nn import functional
from torch.optim import Adam
from Model import Model


class Train(object):
    Epoch = 10

    def __init__(self):
        super(Train, self).__init__()

    @staticmethod
    def train(model, optimizer, loss_func, train_dataloader):
        for epoch in range(Train.Epoch):
            sum_loss = 0
            for i, (data, label) in enumerate(train_dataloader):
                model.train()
                data = data.cuda()
                label = label.cuda()

                predict = model(data)
                loss = loss_func(predict, label.float())
                sum_loss += loss.item()
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
            print(f'轮次:{epoch}\t损失:{sum_loss}')
        torch.save(model, 'model_2.pth')


class Predict:
    @staticmethod
    def predict():
        correct = 0
        total = 0
        model = torch.load('model.pth').cuda()
        test_dataset = MyDataset('Dataset/TestData')
        test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)
        for i, (image, label) in enumerate(test_dataloader):
            image = image.cuda()
            label = label.cuda()
            code = MyDataset.decode(label)
            predict = model(image)
            pre_code = MyDataset.decode(predict)
            if pre_code == code:
                correct += 1
            total += 1
        print(f'正确率:{correct / total:.4f}')


if __name__ == '__main__':
    MODEL = Model().cuda()
    loss_function = nn.MSELoss().cuda()
    opt = Adam(MODEL.parameters(), lr=0.001)
    dataloader = DataLoader(MyDataset('Dataset/TrainData'), batch_size=64, shuffle=True)
    Train.train(MODEL, opt, loss_function, dataloader)
    Predict.predict()

经过训练,最终准确率可以达到99%,可以很好的使用。

你可能感兴趣的:(pytorch,深度学习,神经网络)