最近做了一个身份证号码识别项目,在此分享一下。视频效果如下所示,共有两种识别方法,其一就是直接上传身份证号码的截图进行识别,第二就是上传一张完整的身份证图像,然后通过逐步的处理定位身份证号码的位置,最后进行识别。视频效果如下所示:
身份证号码识别(Opencv,Pytorch)
使用的深度学习模型为CRNN,模型代码如下所示:
# crnn.py
import argparse, os
import torch
import torch.nn as nn
class BidirectionalLSTM(nn.Module):
def __init__(self, nInput_size, nHidden, nOut):
super(BidirectionalLSTM, self).__init__()
self.lstm = nn.LSTM(nInput_size, nHidden, bidirectional=True)
self.linear = nn.Linear(nHidden * 2, nOut)
def forward(self, input):
recurrent, (hidden, cell) = self.lstm(input)
T, b, h = recurrent.size()
t_rec = recurrent.view(T * b, h)
output = self.linear(t_rec) # [T * b, nOut]
output = output.view(T, b, -1) # 输出变换为[seq,batch,类别总数]
return output
class CNN(nn.Module):
def __init__(self, imageHeight, nChannel):
super(CNN, self).__init__()
assert imageHeight % 32 == 0, 'image Height has to be a multiple of 32'
self.depth_conv0 = nn.Conv2d(in_channels=nChannel, out_channels=nChannel, kernel_size=3, stride=1, padding=1,
groups=nChannel)
self.point_conv0 = nn.Conv2d(in_channels=nChannel, out_channels=64, kernel_size=1, stride=1, padding=0,
groups=1)
self.relu0 = nn.ReLU(inplace=True)
self.pool0 = nn.MaxPool2d(kernel_size=2, stride=2)
self.depth_conv1 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, groups=64)
self.point_conv1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=1, stride=1, padding=0, groups=1)
self.relu1 = nn.ReLU(inplace=True)
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.depth_conv2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, groups=128)
self.point_conv2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=1, stride=1, padding=0, groups=1)
self.batchNorm2 = nn.BatchNorm2d(256)
self.relu2 = nn.ReLU(inplace=True)
self.depth_conv3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, groups=256)
self.point_conv3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=1, padding=0, groups=1)
self.relu3 = nn.ReLU(inplace=True)
self.pool3 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 1), padding=(0, 1))
self.depth_conv4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, groups=256)
self.point_conv4 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1, stride=1, padding=0, groups=1)
self.batchNorm4 = nn.BatchNorm2d(512)
self.relu4 = nn.ReLU(inplace=True)
self.depth_conv5 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1, groups=512)
self.point_conv5 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1, padding=0, groups=1)
self.relu5 = nn.ReLU(inplace=True)
self.pool5 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 1), padding=(0, 1))
# self.conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=2, stride=1, padding=0)
self.depth_conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=2, stride=1, padding=0, groups=512)
self.point_conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1, padding=0, groups=1)
self.batchNorm6 = nn.BatchNorm2d(512)
self.relu6 = nn.ReLU(inplace=True)
def forward(self, input):
depth0 = self.depth_conv0(input)
point0 = self.point_conv0(depth0)
relu0 = self.relu0(point0)
pool0 = self.pool0(relu0)
# print(pool0.size())
depth1 = self.depth_conv1(pool0)
point1 = self.point_conv1(depth1)
relu1 = self.relu1(point1)
pool1 = self.pool1(relu1)
# print(pool1.size())
depth2 = self.depth_conv2(pool1)
point2 = self.point_conv2(depth2)
batchNormal2 = self.batchNorm2(point2)
relu2 = self.relu2(batchNormal2)
# print(relu2.size())
depth3 = self.depth_conv3(relu2)
point3 = self.point_conv3(depth3)
relu3 = self.relu3(point3)
pool3 = self.pool3(relu3)
# print(pool3.size())
depth4 = self.depth_conv4(pool3)
point4 = self.point_conv4(depth4)
batchNormal4 = self.batchNorm4(point4)
relu4 = self.relu4(batchNormal4)
# print(relu4.size())
depth5 = self.depth_conv5(relu4)
point5 = self.point_conv5(depth5)
relu5 = self.relu5(point5)
pool5 = self.pool5(relu5)
# print(pool5.size())
depth6 = self.depth_conv6(pool5)
point6 = self.point_conv6(depth6)
batchNormal6 = self.batchNorm6(point6)
relu6 = self.relu6(batchNormal6)
# print(relu6.size())
return relu6
class CRNN(nn.Module):
def __init__(self, imgHeight, nChannel, nClass, nHidden):
super(CRNN, self).__init__()
self.cnn = nn.Sequential(CNN(imgHeight, nChannel))
self.lstm = nn.Sequential(
BidirectionalLSTM(512, nHidden, nHidden),
BidirectionalLSTM(nHidden, nHidden, nClass),
)
def forward(self, input):
conv = self.cnn(input)
# pytorch框架输出结构为BCHW
batch, channel, height, width = conv.size()
assert height == 1, "the output height must be 1."
# 将height==1的维度去掉-->BCW
conv = conv.squeeze(dim=2)
# 调整各个维度的位置(B,C,W)->(W,B,C),对应lstm的输入(seq,batch,input_size)
conv = conv.permute(2, 0, 1)
output = self.lstm(conv)
return output
if __name__ == "__main__":
x = torch.rand(1, 1, 32, 300)
model = CRNN(imgHeight=32, nChannel=1, nClass=12, nHidden=256)
y = model(x)
print(y.shape)
使用的数据集为自制数据集,训练集有2w张图像,测试集中有2000张图像。部分图像如下所示:数据集中共包含11个字符,包含0到9的数字,以及字符X。
from model import CRNN
from mydataset import CRNNDataSet
from torch.utils.data import DataLoader
import torch
from torch import optim
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import os
import random
def decode(preds):
char_set = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9','X'] + [" "]
preds=list(preds)
pred_text = ''
for i,j in enumerate(preds):
if j==n_class-1:
continue
if i==0:
pred_text+=char_set[j]
continue
if preds[i-1]!=j:
pred_text += char_set[j]
return pred_text
def getAcc(preds,labs):
acc=0
char_set = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9','X'] + [" "]
labs=labs.cpu().detach().numpy()
preds = preds.cpu().detach().numpy()
preds=np.argmax(preds,axis=-1)
preds=np.transpose(preds,(1,0))
out=[]
for pred in preds:
out_txt=decode(pred)
out.append(out_txt)
ll=[]
for lab in labs:
a=lab[lab!=-1]
b=[char_set[i] for i in a]
b="".join(b)
ll.append(b)
for a1,a2 in zip(out,ll):
if a1==a2:
acc+=1
return acc/batch_size
batch_size=64
n_class = 12
data_dir='train'
datas=os.listdir(data_dir)
train_lines=os.listdir('train')
val_lines=os.listdir('val')
trainData = CRNNDataSet(lines=train_lines,train=True,img_width=300,data_dir='train')
trainLoader = DataLoader(dataset=trainData, batch_size=batch_size, shuffle=True, num_workers=1)
valData = CRNNDataSet(lines=val_lines,train=False,img_width=300,data_dir='val')
valLoader = DataLoader(dataset=valData, batch_size=batch_size, shuffle=False, num_workers=1)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net = CRNN(imgHeight=32, nChannel=1, nClass=n_class, nHidden=256)
net=net.to(device)
stcdics=torch.load('./model.pth')
net.load_state_dict(state_dict=stcdics)
loss_func = torch.nn.CTCLoss(blank=n_class - 1) # 注意,这里的CTCLoss中的 blank是指空白字符的位置,在这里是第65个,也即最后一个
optimizer = torch.optim.Adam(net.parameters(), lr=0.0005, betas=(0.5, 0.999))
#学习率衰减
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.99)
#画图列表
trainLoss=[]
valLoss=[]
trainAcc=[]
valAcc=[]
if __name__ == '__main__':
#设置迭代次数100次
Epoch=50
epoch_step = len(train_lines) // batch_size
for epoch in range(1, Epoch + 1):
net.train()
train_total_loss = 0
val_total_loss=0
train_total_acc = 0
val_total_acc = 0
with tqdm(total=epoch_step, desc=f'Epoch {epoch}/{Epoch}', postfix=dict, mininterval=0.3) as pbar:
for step, (features, label) in enumerate(trainLoader, 1):
labels = torch.IntTensor([])
for j in range(label.size(0)):
labels = torch.cat((labels, label[j]), 0)
labels=labels[labels!=-1]
features = features.to(device)
labels = labels.to(device)
loss_func=loss_func.to(device)
batch_size = features.size()[0]
out = net(features)
log_probs = out.log_softmax(2).requires_grad_()
targets = labels
input_lengths = torch.IntTensor([out.size(0)] * int(out.size(1)))
target_lengths = torch.where(label!=-1,1,0).sum(dim=-1)
loss = loss_func(log_probs, targets, input_lengths, target_lengths)
acc=getAcc(out,label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_total_loss += loss
train_total_acc += acc
pbar.set_postfix(**{'loss': train_total_loss.item() / (step),
'acc': train_total_acc/ (step), })
pbar.update(1)
trainLoss.append(train_total_loss.item()/step)
trainAcc.append(train_total_acc/step)
#保存模型
torch.save(net.state_dict(), 'model.pth')
#验证
net.eval()
for step, (features, label) in enumerate(valLoader, 1):
with torch.no_grad():
labels = torch.IntTensor([])
for j in range(label.size(0)):
labels = torch.cat((labels, label[j]), 0)
labels = labels[labels != -1]
features = features.to(device)
labels = labels.to(device)
loss_func = loss_func.to(device)
batch_size = features.size()[0]
out = net(features)
log_probs = out.log_softmax(2).requires_grad_()
targets = labels
input_lengths = torch.IntTensor([out.size(0)] * int(out.size(1)))
target_lengths = torch.where(label != -1, 1, 0).sum(dim=-1)
loss = loss_func(log_probs, targets, input_lengths, target_lengths)
acc = getAcc(out, label)
val_total_loss+=loss
val_total_acc+=acc
valLoss.append(val_total_loss.item()/step)
valAcc.append(val_total_acc/step)
lr_scheduler.step()
print('val_loss=',val_total_loss.item()/step)
print('val_acc=',val_total_acc/step)
# print(trainLoss)
# print(valLoss)
"""绘制loss acc曲线图"""
plt.figure()
plt.plot(trainLoss, 'r')
plt.plot(valLoss, 'b')
plt.title('Training and validation loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend(["Loss", "Validation Loss"])
plt.savefig('loss.png')
plt.figure()
plt.plot(trainAcc, 'r')
plt.plot(valAcc, 'b')
plt.title('Training and validation acc')
plt.xlabel("Epochs")
plt.ylabel("Acc")
plt.legend(["Acc", "Validation Acc"])
plt.savefig('acc.png')
# plt.show()