OpenCV文字识别(三) :文字定位与切割(python)

在倾斜校正的预处理完成后便可以开始对处理后的文本文档进行定位与切割了。

总体思路:先切割行,再切割列,得到一张张字体图片并标号储存方便后续的识别。

其中切割的方法采用了投影法,即在二值化后判断每一行的黑白色素。

Python测试代码:

import YuChuLi
import cv2
import numpy as np
V_PROJECT = 1
H_PROJECT = 2
count = 0


def get_text_project(img_text, mode):       # 二值化之后通道为1,故img.shape[2]不存在了(相当于降维了)
    pos = []
    if mode == V_PROJECT:
        pos = np.zeros([img_text.shape[1]], dtype=np.int)
        for i in range(img_text.shape[0]):
            for j in range(img_text.shape[1]):
                if img_text[i, j] == 255:
                    pos[j] += 1

    if mode == H_PROJECT:
        pos = np.zeros([img_text.shape[0]], dtype=np.int)
        for i in range(img_text.shape[1]):
            for j in range(img_text.shape[0]):
                if img_text[j, i] == 255:
                    pos[j] += 1
    return pos


# 获取每个分割字符的范围,min_thresh:波峰的最小幅度
# min_range:两个波峰的最小间隔(一个符号的最小占位)
def get_peek_range(pos, min_tresh, min_range):
    begin = 0
    # end = 0
    peek_range = []
    for i in range(len(pos)):
        if pos[i] > min_tresh and begin == 0:
            begin = i
        elif pos[i] > min_tresh and begin != 0:
            continue
        elif pos[i] < min_tresh and begin != 0:
            end = i
            if end-begin >= min_range:
                (x, y) = (begin, end)
                peek_range.append((x, y))
                begin = 0
                # end = 0
        elif pos[i] < min_tresh or begin == 0:
            continue
    return peek_range


# 存储切割后的字
def save_cut(img_save, cut_id):
    filename = "./tmp/" + str(cut_id) + ".jpg"
    cv2.imwrite(filename, img_save)


# 切割一行中的各个字符,并把框出来的字符一个个保存到目标文件夹下
def cut_char(img_char, peek_range):
    global count 
    chars_set = []
    for i in range(len(peek_range)):
        # peek_range[i][0]为每一个分段的开始
        char_gap = peek_range[i][1] - peek_range[i][0]
        if peek_range[i][0]-2 > 0:
            x = peek_range[i][0]-2
        else:
            x = 0
        if char_gap+4 <= img_char.shape[0]:
            w = char_gap
        else:
            w = img_char.shape[0]
        single_char = img_char[0:img_char.shape[0], x:x+w]
        chars_set.append(single_char)
        save_cut(single_char, count)
        count += 1
    return chars_set


# 截取单行
def cut_one_line(img_lines, begin, end):
    line = img_lines[begin:end, 0:img_lines.shape[1]]
    return line


# 切割
def cut_text(img_pre_cut):
    lines_set = []
    h_pos = get_text_project(img_pre_cut, H_PROJECT)
    h_peek_range = get_peek_range(h_pos, 2, 10)
    for i in range(len(h_peek_range)):
        line = cut_one_line(img_pre_cut, h_peek_range[i][0], h_peek_range[i][1])
        lines_set.append(line)
    for i in range(len(lines_set)):
        line = lines_set[i]
        v_pos = get_text_project(line, V_PROJECT)
        v_peek_range = get_peek_range(v_pos, 2, 30)
        chars_set = cut_char(line, v_peek_range)
    return chars_set


if __name__ == "__main__":
    img = cv2.imread("D:/pycharm/Word_Lib/test1.jpg")
    img = YuChuLi.two_value(img)
    char_set = cut_text(img)
    cv2.waitKey()

总结:

1.算法较为简单,对于最小幅度和最小波峰间隔需要自行调整。而且难以兼顾较复杂的情况,如:标点符号和单字间隔较大(如“小”等)之间的识别矛盾。

 

 

你可能感兴趣的:(#文字识别,python,定位)