Python代码使用easyocr识别身份证号码,直接上代码
import easyocr
import os
import re
import pandas as pd
class card():
def __init__(self):
self.images = r'D:/sfz'
def ocr_reader(self): # 创建ocr对象,识别中英文
ocr = easyocr.Reader(['ch_sim','en'],gpu=True)
return ocr
def read_card(self): # 识别图片文字,并遍历
data = []
for image in os.listdir(self.images):
content = self.ocr_reader().readtext(f'{self.images}/{image}',detail=0)
content = ''.join(content) # 列表转换为纯文本
new_content = content.replace(" ", "") # 去除掉空格内容
print(f'正在识别:{image}')
name = re.findall(r'名(.*?)性', new_content)
gender = re.findall(r'别(.*?)民族|民', new_content)
nation = re.findall(r'族|民族(.*?)出', new_content)
address = re.findall(r'址(.*?)公', new_content)
number = re.findall(r'身份号码(\d+)', new_content)
new_name = ''.join(name)
new_gender = ''.join(gender)
new_nation = ''.join(nation)
new_address = ''.join(address)
new_number = ''.join(number)
if len(new_number) == 18: # 判断身份证的位数
pass
elif len(new_number) == 17:
new_number = new_number + "X"
print(f'完成识别:{image}')
data.append([new_name, new_gender, new_nation, new_address, new_number])
print(data)
return data
def write_to_excel(self):
df = pd.DataFrame(self.read_card(),columns = ['姓名','性别','民族','地址','身份证号码'])
print(f'识别结果如下:')
print(df)
df.to_excel(r'D:/sfz/识别结果.xlsx',index=False)
return df
if __name__ == '__main__':
info = card()
info.read_card()
info.write_to_excel()
本文仅用于学术交流,禁止用于非法用途,后果自负。
由于用到的包下载很慢可以直接下载这里
https://download.csdn.net/download/airyearth/86746644https://download.csdn.net/download/airyearth/86746644