# -*- coding=GBK -*-
import cv2 as cv
from PIL import Image
import pytesseract
def recognize_text():
gray = cv.cvtColor(src, cv.COLOR_BGR2GRAY)
ret, binary = cv.threshold(gray, 0, 255, cv.THRESH_BINARY_INV | cv.THRESH_OTSU)
cv.imshow("Binarization", binary)
kernel = cv.getStructuringElement(cv.MORPH_RECT, (1, 8))
binl = cv.morphologyEx(binary, cv.MORPH_OPEN, kernel)
cv.imshow("MORPH_OPEN_1", binl)
kernel = cv.getStructuringElement(cv.MORPH_RECT, (8, 1))
open_out = cv.morphologyEx(binl, cv.MORPH_OPEN, kernel)
cv.imshow("MORPH_OPEN_2", open_out)
cv.bitwise_not(open_out, open_out) # 背景变为白色
cv.imshow("Transform", open_out)
textImage = Image.fromarray(open_out)
text = pytesseract.image_to_string(textImage) # 验证码显示数字或者字母尽量并排,且transform中的字符须保持独立、无断开、完整连接的整体
print("This OK: %s" % text)
src = cv.imread("yanzhengma.png")
cv.imshow("before", src)
recognize_text()
cv.waitKey(0)
cv.destroyAllWindows()
使用两次kernel不同的开操作是为了减少识别中的噪点