Python中的的tesserocr这个库好像使用的比较多,所以对这个库进行了一番研究,并且实现了那个后台网站验证码的识别。
但是安装后依然会遇到这个问题:
作者:大王大大王
原文:https://blog.csdn.net/wang_hugh/article/details/80760940
使用pytesseract识别验证码中遇到异常如下:
pytesseract.pytesseract.TesseractNotFoundError: tesseract is not installed or it’s not in your path
检查源码(pytesseract.py),发现如下说明:
#CHANGE THIS IF TESSERACT IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY
tesseract_cmd = ‘tesseract’
安装后的默认文件路径为(这里使用的是Windows版本):C:\Program Files (x86)\Tesseract-OCR
然后将源码中的:
tesseract_cmd = ‘tesseract’
更改为:
tesseract_cmd = r’C:\Program Files (x86)\Tesseract-OCR\tesseract.exe’
再次运行之前的PY脚本,成功.
import cv2
import numpy as np
from PIL import Image
import pytesseract as tess
def recognize_text(img):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow("binimg", gray)
ret, binnary = cv2.threshold(gray, 100, 255, cv2.THRESH_OTSU)
cv2.imshow("binmg", binnary)
kerhel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
bin1 = cv2.morphologyEx(binnary, cv2.MORPH_OPEN, kerhel1, iterations=1)
kerhel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 1))
bin2 = cv2.morphologyEx(binnary, cv2.MORPH_OPEN, kerhel2, iterations=1)
cv2.imshow("binary_img",bin2)
text = tess.image_to_string(bin2)
print("识别结果:"%text)
img = cv2.imread('12.jpg',1)
cv2.namedWindow('img', 0)
cv2.namedWindow('binnary', 0)
cv2.namedWindow('sure_bg', 0)
cv2.namedWindow('dist', 0)
cv2.namedWindow('surface', 0)
cv2.namedWindow('return', 0)
cv2.resizeWindow('img', 480, 320)
cv2.resizeWindow('binnary',400, 320)
cv2.resizeWindow('sure_bg',400, 320)
cv2.resizeWindow('dist',400, 320)
cv2.resizeWindow('surface',400, 320)
cv2.resizeWindow('return',480, 320)
cv2.imshow('img', img)
recognize_text(img)
cv2.waitKey(0)
cv2.destroyAllWindows()