要点:
参考:
基于PaddleOCR的数字显示器字符识别
工业仪表数值识别
问题分析 要处理电表中的数据,可以分为步骤,拆解为以下问题:
针对问题1,经过实验与探索,也找到两种方案:
方案1,直接利用PaddleOCR默认自带的检测器,筛选掉其他无效的框体和信息,剩下的就是有用的。(未经过训练的,直接使用预训练模型)
方案2,通过Opencv图像处理的方法,根据电表字符区域特征进行相应的轮廓提取和颜色筛选,从而保证其得到有效的定位。
方案3,收集场景下的大量电表字符识别数据,制作数据集并进行标记,分别训练其定位和识别模型。
考虑到时间成本和人工成本问题,这里优先选择前两种方案。 下面是使用Opencv来进行ROI区域定位的方法。
# 导入依赖库
import os
from tqdm import tqdm
import cv2
import csv
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import warnings
from paddleocr import PaddleOCR, draw_ocr
# 忽略警告
warnings.filterwarnings("ignore")
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # 防止报错
最后一行是防止报错,
# 可视化绘图
def imshow_image(img_path):
img = Image.open(img_path)
plt.figure("test_img", figsize=(5, 5))
plt.imshow(img)
plt.show()
def find_biggest_contour(image):
"""获取最大轮廓"""
image = image.copy()
contours, hierarchy = cv2.findContours(image, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contour_sizes = [(cv2.contourArea(contour), contour) for contour in contours]
biggest_contour = max(contour_sizes, key=lambda x: x[0])[1]
return biggest_contour
def get_find_display(input_path, lower=(0, 0, 0), higher=(255, 255, 255), output_path='./'):
"""查找ROI轮廓"""
img = cv2.imread(input_path)
# print('input:', input_path)
filename = input_path.split('/')[-1]
f_name = filename.split('.')[0]
# print('filename:', filename, 'f_name:', f_name)
global img_crop
lowHue = lower[0]
lowSat = lower[1]
lowVal = lower[2]
highHue = higher[0]
highSat = higher[1]
highVal = higher[2]
# 可选择不同的模糊方法
frameBGR = cv2.GaussianBlur(img, (7, 7), 0)
# 转换为HSV颜色空间
hsv = cv2.cvtColor(frameBGR, cv2.COLOR_BGR2HSV)
# 定义HSV值颜色范围
colorLow = np.array([lowHue, lowSat, lowVal])
colorHigh = np.array([highHue, highSat, highVal])
mask = cv2.inRange(hsv, colorLow, colorHigh)
kernal = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernal)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernal)
biggest_contour = find_biggest_contour(mask)
# cv2.drawContours(img, biggest_contour, -1, (0, 255, 0), 2)
print('cnt_len:', len(biggest_contour))
# 将遮罩放在原始图像的上方。
result_img = cv2.bitwise_and(img, img, mask=mask)
if biggest_contour is not None:
x, y, w, h = cv2.boundingRect(biggest_contour)
print(x, y, w, h)
img_crop = img[y:y + h, x:x + w]
print('wpath:', output_path + filename)
save_path = output_path + filename
if not os.path.exists(output_path):
os.mkdir(output_path)
cv2.imwrite(save_path, img_crop)
else:
img_crop = img
return result_img, img_crop
output_path = 'work/roi/'
# img_roi = 'test/133102_steerPoint5_preset1255_20220917221726_v.jpeg'
img_roi = 'test/number_item.jpg'
lower = (0, 80, 0)
higher = (255, 255, 255)
result_img, img_crop = get_find_display(img_roi, lower, higher, output_path)
img_roi_path = 'work/roi/number_item.jpg'
imshow_image(img_roi_path)
def rec_display_roi(img_roi):
ocr = PaddleOCR()
result = ocr.ocr(img_roi, det=False)
return result[0][0], result[0][1]
rec_display_roi(img_roi_path)
获取识别区
output_path = 'work/roi/'
img_roi = 'test/number_use.jpg'
lower = (0, 0, 0)
higher = (255, 255, 255)
result_img, img_crop = get_find_display(img_roi, lower, higher, output_path)
imshow_image(img_roi)
进行识别
img_roi_path = "work/roi/number_use.jpg"
imshow_image(img_roi_path)
def rec_display_roi(img_roi):
# ocr = PaddleOCR()
ocr = PaddleOCR(use_gpu=True)
result = ocr.ocr(img_roi, det=False)
return result[0][0]
rec_display_roi(img_roi)
识别结果: ('0598', 0.7145649790763855)
方案2,直接使用PaddleOCR将所有可能是OCR的对象进行检测和识别。 再从中筛选要的结果。
# Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换
# 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`
ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
# 数据可视化
# img_path = 'test/133102_steerPoint5_preset1255_20220917221726_v.jpeg'
img_path = 'test/number_item.jpg'
save_path = 'work/dst/result.jpg'
result = ocr.ocr(img_path, cls=True)
for line in result:
print(line)
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores, font_path='work/font/simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save(save_path)
"""[[[1936.0, 56.0], [2461.0, 56.0], [2461.0, 109.0], [1936.0, 109.0]], ('2022-11-28 07:38:28', 0.8835511)]
[[[2461.0, 450.0], [2557.0, 450.0], [2557.0, 500.0], [2461.0, 500.0]], ('原水', 0.99717796)]
[[[12.0, 1310.0], [483.0, 1322.0], [481.0, 1404.0], [10.0, 1392.0]], ('水泵房仪表间', 0.93879247)]
im_show """
def write_to_csv(log_path, filename='', result=0.00, score=0, mode_head=True):
file = open(log_path, 'a+', encoding='utf-8', newline='')
csv_writer = csv.writer(file)
if mode_head == True:
csv_writer.writerow([f'filename', f'result', f'score'])
else:
csv_writer.writerow([filename, result, score])
file.close()
def get_bbox_area(box):
"""计算bbox的面积"""
bbox_area = (max(box[2]) - max(box[0])) * (max(box[3]) - max(box[1]))
return bbox_area
def quadArea(nodes):
"""计算多边形的面积"""
# 基于向量积计算不规则多边形的面积, 坐标点需要按顺序(逆时针或顺时针)选取
i_count = len(nodes)
area_temp = 0
for i in range(i_count):
area_temp += nodes[i][0] * nodes[(i + 1) % i_count][1] - nodes[(i + 1) % i_count][0] * nodes[i][1]
return abs(area_temp)
def bboxes_choose(boxes, txts, scores):
"""获取最大框体"""
area_list = []
for i in range(0, len(boxes)):
bx = boxes[i]
# area = get_bbox_area(bx)
area = quadArea(bx)
# print('bx:', bx, 'area:',area)
area_list.append(area)
if len(area_list) == 0:
index = 0
else:
index = area_list.index(max(area_list))
if len(boxes) == 0:
boxes = []
else:
boxes = [boxes[index]]
txts = [txts[index]]
scores = [scores[index]]
return boxes, txts, scores
def ocr_roi_det(img_path, font, save_path='./work/save/'):
"""OCR识别"""
result = ocr.ocr(img_path, cls=True)
# for line in result:
# print(line)
# 显示结果
from PIL import Image
image = Image.open(img_path).convert('RGB')
fileslist = img_path.split('/')
fname = fileslist[-1].split('.')[0]
# [[[151.0, 53.0], [277.0, 53.0], [277.0, 111.0], [151.0, 111.0]], ('00.2', 0.9423570036888123)]
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
boxes, txts, scores = bboxes_choose(boxes, txts, scores)
# bs = nms(boxes, scores)
# print('bs:', bs)
im_show = draw_ocr(image, boxes, txts, scores, font_path=font)
im_show = Image.fromarray(im_show)
if not os.path.exists(save_path):
os.mkdir(save_path)
im_show.save(save_path + fname + '_result.jpg')
return txts[0], scores[0]
def all_test_det(path, log_path, font, save_path):
"""执行识别算法,并记录结果到csv"""
count = 0
img_list = []
img_ans_dic = {}
for filepath, dirnames, filenames in os.walk(path): # 在多级目录下找文件
for filename in filenames:
file_path = filepath + filename
# print('file_path:', file_path)
img_list.append(file_path)
global score
write_to_csv(log_path)
for i in tqdm(range(0, len(img_list) - 1)):
img_roi = img_list[i]
# result, score = rec_display_roi(img_roi)
fileslist = img_roi.split('/')
fname = fileslist[-1].split('.')[0]
result, score = ocr_roi_det(img_roi, font, save_path)
print('result:', result, 'score:', score)
if result != '':
img_ans_dic[fname] = score
count += 1
else:
score = -1
img_ans_dic[fname] = score
continue
write_to_csv(log_path, fname, result, score, False)
print('count:', count)
print('dict_len:', len(img_ans_dic))
print('ans_dict:', img_ans_dic)
if __name__ == '__main__':
# Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换
# 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`
print('查看ocr数据模型')
ocr = PaddleOCR(use_angle_cls=False, lang="en") # need to run only once to download and load model into memory
print('ocr:', ocr)
# img_path = 'test/number_use.jpg'
font_path = 'work/font/simfang.ttf'
a, b = ocr_roi_det(img_path, font_path)
print('查看识别结果:', a, b)
log_path = 'work/log/result.csv'
save_path = 'work/save_result/'
test_path = 'work/dataset/test/'
all_test_det(test_path, log_path, font_path, save_path)
# 结果分析
# rs_img = 'work/save_result/133102_steerPoint5_preset1255_20220917221726_v_result.jpg'
# rs_img = 'test/72635_steerPoint12_preset1294_20220919123447_v.jpeg'
rs_img = 'test/number_use.jpg'
imshow_image(rs_img)
print('执行到最后位置》')
优化后的算法