Python识别身份证信息,并保存在excel当中

编写代码前的注意事项

批量进行身份证信息的识别,并保存在excel文件当中
python当前版本:3.7
需要安装包:pandas,easyocr,openpyxl

import easyocr
import os
import re
import pandas as pd

class id_card_ocr():

    def __init__(self):  # 文件位置
        self.images = r'D:/id_card'   # 需要注意的是,图片文件的名称不能有汉字,否则会报错~

    def ocr_reader(self): # 创建ocr对象,识别中英文
        ocr = easyocr.Reader(['ch_sim','en'],gpu=True)
        return ocr

    def read_content(self): # 识别图片文字,并遍历
        data = []
        for image in os.listdir(self.images):
            content = self.ocr_reader().readtext(f'{self.images}/{image}',detail=0)
            content = ''.join(content)  # 列表转换为纯文本
            new_content = content.replace(" ", "") # 去除掉空格内容
            print(f'正在识别:{image}')
            name = re.findall(r'名(.*?)性', new_content)
            gender = re.findall(r'别(.*?)民族|民', new_content)
            nation = re.findall(r'族|民族(.*?)出', new_content)
            address = re.findall(r'址(.*?)公', new_content)
            number = re.findall(r'身份号码(\d+)', new_content)

            new_name = ''.join(name)
            new_gender = ''.join(gender)
            new_nation = ''.join(nation)
            new_address = ''.join(address)
            new_number = ''.join(number)
            if len(new_number) == 18:  # 判断身份证的位数
                pass
            elif len(new_number) == 17:
                new_number = new_number + "X"
            print(f'完成识别:{image}')
            data.append([new_name, new_gender, new_nation, new_address, new_number])
        print(data)
        return data

    def read_to_excel(self):
        df = pd.DataFrame(self.read_content(),columns = ['姓名','性别','民族','地址','身份证号码'])
        print(f'识别结果如下:')
        print(df)
        df.to_excel(r'D:/id_card/识别结果.xlsx',index=False)
        return df


if __name__ == '__main__':
    info = id_card_ocr()
    info.read_content()
    info.read_to_excel()

以上,生日的信息,正则不是很好写,其实身份证信息当中,就包含了生日信息,大家如果有兴趣,可以自行编写一下哦~

你可能感兴趣的:(python,pandas)