python 进行文本识别

1. 原始图像

python 进行文本识别_第1张图片

2. 进行透视变换

python 进行文本识别_第2张图片

python 进行文本识别_第3张图片

python 进行文本识别_第4张图片

3. code:

import cv2
import pytesseract
import numpy as np


def cv_show(name, img):
    cv2.imshow(name, img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


def resize(img, width=None, height=None):
    (rows, cols) = img.shape[:2]
    if width is None and height is None:
        return img
    elif width is None:
        ar = float(height)/rows
        width = int(ar*cols)
    else:
        ar = float(width) / cols
        height = int(ar * rows)
    dim = (width, height)
    img = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
    return img


def repts(pts):
    rect = np.zeros((4, 2), dtype=np.float32)
    pt_sum = pts.sum(axis=1)
    pt_diff = np.diff(pts, axis=1)
    rect[0] = pts[np.argmin(pt_sum)]
    rect[2] = pts[np.argmax(pt_sum)]
    rect[1] = pts[np.argmin(pt_diff)]  # 高维度减去地维度
    rect[3] = pts[np.argmax(pt_diff)]
    return rect


# 图像预处理
image_org = cv2.imread('./images/page.jpg')
image = resize(image_org, height=500)
cv_show('image', image)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv_show('gray', gray)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
cv_show('gray', gray)
canny = cv2.Canny(gray, 70, 150)
cv_show('canny', canny)
# 检测轮廓
cnts = cv2.findContours(canny, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:3]
image_draw = image.copy()
image_draw = cv2.drawContours(image_draw, cnts, 0, (0, 0, 255), 2, 8)
cv_show("image_draw", image_draw)  # 提取出边缘轮廓
# 进行透视变换
pts = cv2.approxPolyDP(cnts[0], 0.02*cv2.arcLength(cnts[0], closed=True), closed=True)
pts = np.reshape(pts, (4, 2))*(image_org.shape[0]/500)
pts = repts(pts)  # 重新排序 0-1-2-3 顺时针
new_height = int(max(np.sqrt(np.sum(np.square(pts[0] - pts[3]))), np.sqrt(np.sum(np.square(pts[1] - pts[2])))))
new_width = int(max(np.sqrt(np.sum(np.square(pts[0] - pts[1]))), np.sqrt(np.sum(np.square(pts[2] - pts[3])))))
new_pts = np.array([[0, 0], [new_width-1, 0], [new_width-1, new_height-1], [0, new_height-1]], dtype=np.float32)
M = cv2.getPerspectiveTransform(pts, new_pts)
img = cv2.warpPerspective(image_org, M, (new_width, new_height))
img_resize = resize(img, height=500)
cv_show('img_resize', img_resize)
# 识别文字,写文件
txt = pytesseract.image_to_string(img_resize)
with open('./images/word.txt', 'w') as f:
    f.write(txt)


4. 识别结果:

python 进行文本识别_第5张图片

 

你可能感兴趣的:(python)