说明:
1.对于拍摄的文字图片要有清晰的矩形轮廓;
2.可以是文字截图;
import numpy as np
import cv2
from PIL import Image
import pytesseract
import os
import time
start =time.time()
#显示图像,按任意键退出显示,程序继续运行
def cv_show(name,img):
cv2.imshow(name,img)
cv2.waitKey(0)
cv2.destroyAllWindows()
#找到四边形的四个顶点
def order_points(pts):
rect=np.zeros((4,2),dtype=“float32”)
s=pts.sum(axis=1)
rect[0]=pts[np.argmin(s)]
rect[2]=pts[np.argmax(s)]
diff=np.diff(pts,axis=1)
rect[1]=pts[np.argmin(diff)]
rect[3]=pts[np.argmax(diff)]
return rect
#将不规则的四边形转换成矩形图像
def four_point_transform(image,pts):
rect=order_points(pts)
(tl,tr,br,bl)=rect
widthA=np.sqrt(((br[0]-bl[0])**2)+((br[1]-bl[1])**2))
widthB=np.sqrt(((tr[0]-tl[0])**2)+((tr[1]-tl[1])**2))
maxWidth=max(int(widthA),int(widthB))
heightA=np.sqrt(((tr[0]-br[0])**2)+((tr[1]-br[1])**2))
heightB=np.sqrt(((tl[0]-bl[0])**2)+((tl[1]-bl[1])**2))
maxHeight=max(int(heightA),int(heightB))
dst=np.array([[0,0],
[maxWidth-1,0],
[maxWidth-1,maxHeight-1],
[0,maxHeight-1]],dtype="float32")
M=cv2.getPerspectiveTransform(rect,dst)
warped=cv2.warpPerspective(image,M,(maxWidth,maxHeight))
return warped
#图片缩放到目标大小
def resize(image,width=None,height=None,inter=cv2.INTER_AREA):
dim=None
(h,w)=image.shape[:2]
if width is None and height is None:
return image
if width is None:
r=height/float(h)
dim=(int(w*r),height)
resized=cv2.resize(image,dim,interpolation=inter)
return resized
#读取拍摄好的文章
image=cv2.imread(“C:/Users/lenovo/Pictures/dataset/canny_1.png”)
ratio=image.shape[0]/500.0
orig=image.copy()
img=resize(orig,height=500)
try:
#将图像中的文章轮廓检测出来
gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
gray=cv2.GaussianBlur(gray,(5,5),0)
edged=cv2.Canny(gray,75,200)
#提取出轮廓特征
cnts=cv2.findContours(edged.copy(),cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)[0]
cnts=sorted(cnts,key=cv2.contourArea,reverse=True)[:5]
#计算每个轮廓的拟合曲线
for c in cnts:
peri=cv2.arcLength(c,True)
approx=cv2.approxPolyDP(c,0.02*peri,True)
if len(approx)==4:
screenCnt=approx
break
#对特征进行透视变换成标准矩形
warped=four_point_transform(orig,screenCnt.reshape(4,2)*ratio)
warped=cv2.cvtColor(warped,cv2.COLOR_BGR2GRAY)
ref=cv2.threshold(warped,100,255,cv2.THRESH_BINARY)[1]
#调用tesseract进行文字识别
text = pytesseract.image_to_string(ref,lang=“chi_sim+eng”)
except:
text = pytesseract.image_to_string(img,lang=“chi_sim+eng”)
#将文字识别结果输出到桌面上
with open(“C:/Users/lenovo/Desktop/OCR_Result.txt”,“w”,encoding=‘utf-8’) as f:
f.write(text)
end =time.time()
print(“耗时”+str(round(end-start,2))+“秒,请在桌面上查看.” )