使用模板匹配进行数字识别
将信用卡里取取来的数字与模板里的每一个数字进行匹配,得分高者即是该数字
#python3 ocr_template_match.py -i images/credit_card_01.png -t images/ocr_a_reference.png
# 导入工具包
from imutils import contours
import numpy as np
import argparse
import cv2
import myutils
# 设置参数
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image")
ap.add_argument("-t", "--template", required=True,
help="path to template OCR-A image")
args = vars(ap.parse_args())
# 指定信用卡类型
FIRST_NUMBER = {
"3": "American Express",
"4": "Visa",
"5": "MasterCard",
"6": "Discover Card"
}
# 绘图展示
def cv_show(name,img):
cv2.imshow(name, img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# 读取一个模板图像
img = cv2.imread(args["template"])
cv_show('img',img)
# 灰度图
ref = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv_show('ref',ref)
# 二值图像
ref = cv2.threshold(ref, 10, 255, cv2.THRESH_BINARY_INV)[1]
cv_show('ref2',ref)
# 计算轮廓
#cv2.findContours()函数接受的参数为二值图,即黑白的(不是灰度图),cv2.RETR_EXTERNAL只检测外轮廓,cv2.CHAIN_APPROX_SIMPLE只保留终点坐标
#返回的list中每个元素都是图像中的一个轮廓
_,refCnts, hierarchy = cv2.findContours(ref.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(img,refCnts,-1,(0,0,255),3)
cv_show('img',img)
print (np.array(refCnts).shape)
refCnts = myutils.sort_contours(refCnts, method="left-to-right")[0] #排序,从左到右,从上到下
digits = {}
# 遍历每一个轮廓
for (i, c) in enumerate(refCnts):
# 计算外接矩形并且resize成合适大小
(x, y, w, h) = cv2.boundingRect(c)
roi = ref[y:y + h, x:x + w]
roi = cv2.resize(roi, (57, 88))
# 每一个数字对应每一个模板
digits[i] = roi
# 初始化卷积核
rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 3))
sqKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
#读取输入图像,预处理
image = cv2.imread(args["image"])
cv_show('image',image)
image = myutils.resize(image, width=300)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv_show('gray',gray)
#礼帽操作,突出更明亮的区域
tophat = cv2.morphologyEx(gray, cv2.MORPH_TOPHAT, rectKernel)
cv_show('tophat',tophat)
#
gradX = cv2.Sobel(tophat, ddepth=cv2.CV_32F, dx=1, dy=0, #ksize=-1相当于用3*3的
ksize=-1)
gradX = np.absolute(gradX)
(minVal, maxVal) = (np.min(gradX), np.max(gradX))
gradX = (255 * ((gradX - minVal) / (maxVal - minVal)))
gradX = gradX.astype("uint8")
print (np.array(gradX).shape)
cv_show('gradX',gradX)
#通过闭操作(先膨胀,再腐蚀)将数字连在一起
gradX = cv2.morphologyEx(gradX, cv2.MORPH_CLOSE, rectKernel)
cv_show('gradX',gradX)
#THRESH_OTSU会自动寻找合适的阈值,适合双峰,需把阈值参数设置为0
thresh = cv2.threshold(gradX, 0, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
cv_show('thresh',thresh)
#再来一个闭操作
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, sqKernel) #再来一个闭操作
cv_show('thresh',thresh)
# 计算轮廓
_,threshCnts, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = threshCnts
cur_img = image.copy()
cv2.drawContours(cur_img,cnts,-1,(0,0,255),3)
cv_show('img2',cur_img)
locs = []
# 遍历轮廓
for (i, c) in enumerate(cnts):
# 计算矩形
(x, y, w, h) = cv2.boundingRect(c)
ar = w / float(h)
# 选择合适的区域,根据实际任务来,这里的基本都是四个数字一组
if ar > 2.5 and ar < 4.0:
if (w > 40 and w < 55) and (h > 10 and h < 20):
#符合的留下来
locs.append((x, y, w, h))
# 将符合的轮廓从左到右排序
locs = sorted(locs, key=lambda x:x[0])
output = []
# 遍历每一个轮廓中的数字
for (i, (gX, gY, gW, gH)) in enumerate(locs):
# initialize the list of group digits
groupOutput = []
# 根据坐标提取每一个组
group = gray[gY - 5:gY + gH + 5, gX - 5:gX + gW + 5]
cv_show('group',group)
# 预处理
group = cv2.threshold(group, 0, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
cv_show('group',group)
# 计算每一组的轮廓
_,digitCnts,hierarchy = cv2.findContours(group.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
digitCnts = contours.sort_contours(digitCnts,
method="left-to-right")[0]
# 计算每一组中的每一个数值
for c in digitCnts:
# 找到当前数值的轮廓,resize成合适的的大小
(x, y, w, h) = cv2.boundingRect(c)
roi = group[y:y + h, x:x + w]
roi = cv2.resize(roi, (57, 88))
cv_show('roi',roi)
# 计算匹配得分
scores = []
# 在模板中计算每一个得分
for (digit, digitROI) in digits.items():
# 模板匹配
result = cv2.matchTemplate(roi, digitROI,
cv2.TM_CCOEFF)
(_, score, _, _) = cv2.minMaxLoc(result)
scores.append(score)
# 得到最合适的数字
groupOutput.append(str(np.argmax(scores)))
# 画出来
cv2.rectangle(image, (gX - 5, gY - 5),
(gX + gW + 5, gY + gH + 5), (0, 0, 255), 1)
cv2.putText(image, "".join(groupOutput), (gX, gY - 15),
cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 0, 255), 2)
# 得到结果
output.extend(groupOutput)
# 打印结果
print("Credit Card Type: {}".format(FIRST_NUMBER[output[0]]))
print("Credit Card #: {}".format("".join(output)))
cv2.imshow("Image", image)
cv2.waitKey(0)
找到选项的轮廓,从上到下,从左到右依次扫描,根据阈值判断是否选中
#python3 get_answer.py -i images/example_test.png
#导入工具包
import numpy as np
import argparse
import imutils
import cv2
# 设置参数
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to the input image")
args = vars(ap.parse_args())
# 正确答案
ANSWER_KEY = {0: 1, 1: 4, 2: 0, 3: 3, 4: 1}
def order_points(pts):
# 一共4个坐标点
rect = np.zeros((4, 2), dtype = "float32")
# 按顺序找到对应坐标0123分别是 左上,右上,右下,左下
# 计算左上,右下
s = pts.sum(axis = 1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# 计算右上和左下
diff = np.diff(pts, axis = 1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
def four_point_transform(image, pts):
# 获取输入坐标点
rect = order_points(pts)
(tl, tr, br, bl) = rect
# 计算输入的w和h值
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# 变换后对应坐标位置
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype = "float32")
# 计算变换矩阵
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
# 返回变换后结果
return warped
def sort_contours(cnts, method="left-to-right"):
reverse = False
i = 0
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
key=lambda b: b[1][i], reverse=reverse))
return cnts, boundingBoxes
def cv_show(name,img):
cv2.imshow(name, img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# 预处理
image = cv2.imread(args["image"])
contours_img = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
cv_show('blurred',blurred)
edged = cv2.Canny(blurred, 75, 200)
cv_show('edged',edged)
# 轮廓检测
cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)[1]
cv2.drawContours(contours_img,cnts,-1,(0,0,255),3)
cv_show('contours_img',contours_img)
docCnt = None
# 确保检测到了
if len(cnts) > 0:
# 根据轮廓大小进行排序
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
# 遍历每一个轮廓
for c in cnts:
# 近似
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
# 准备做透视变换
if len(approx) == 4:
docCnt = approx
break
# 执行透视变换
warped = four_point_transform(gray, docCnt.reshape(4, 2))
cv_show('warped',warped)
# Otsu's 阈值处理
thresh = cv2.threshold(warped, 0, 255,
cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
cv_show('thresh',thresh)
thresh_Contours = thresh.copy()
# 找到每一个圆圈轮廓
cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)[1]
cv2.drawContours(thresh_Contours,cnts,-1,(0,0,255),3)
cv_show('thresh_Contours',thresh_Contours)
questionCnts = []
# 遍历
for c in cnts:
# 计算比例和大小
(x, y, w, h) = cv2.boundingRect(c)
ar = w / float(h)
# 根据实际情况指定标准
if w >= 20 and h >= 20 and ar >= 0.9 and ar <= 1.1:
questionCnts.append(c)
# 按照从上到下进行排序
questionCnts = sort_contours(questionCnts,
method="top-to-bottom")[0]
correct = 0
# 每排有5个选项
for (q, i) in enumerate(np.arange(0, len(questionCnts), 5)):
# 排序
cnts = sort_contours(questionCnts[i:i + 5])[0]
bubbled = None
# 遍历每一个结果
for (j, c) in enumerate(cnts):
# 使用mask来判断结果
mask = np.zeros(thresh.shape, dtype="uint8")
cv2.drawContours(mask, [c], -1, 255, -1) #-1表示填充
cv_show('mask',mask)
# 通过计算非零点数量来算是否选择这个答案
mask = cv2.bitwise_and(thresh, thresh, mask=mask)
total = cv2.countNonZero(mask)
# 通过阈值判断
if bubbled is None or total > bubbled[0]:
bubbled = (total, j)
# 对比正确答案
color = (0, 0, 255)
k = ANSWER_KEY[q]
# 判断正确
if k == bubbled[1]:
color = (0, 255, 0)
correct += 1
# 绘图
cv2.drawContours(warped, [cnts[k]], -1, color, 3)
score = (correct / 5.0) * 100
print("[INFO] score: {:.2f}%".format(score))
cv2.putText(warped, "{:.2f}%".format(score), (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 2)
cv2.imshow("Original", image)
cv2.imshow("Exam", warped)
cv2.waitKey(0)
import cv2
import numpy as np
from moviepy.editor import VideoFileClip
# 高斯滤波核大小
blur_ksize = 5
# Canny边缘检测高低阈值
canny_lth = 50
canny_hth = 150
# 霍夫变换参数
rho = 1
theta = np.pi / 180
threshold = 15
min_line_len = 40
max_line_gap = 20
def process_an_image(img):
# 1. 灰度化、滤波和Canny
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
blur_gray = cv2.GaussianBlur(gray, (blur_ksize, blur_ksize), 1)
edges = cv2.Canny(blur_gray, canny_lth, canny_hth)
# 2. 标记四个坐标点用于ROI截取
rows, cols = edges.shape
points = np.array([[(0, rows), (460, 325), (520, 325), (cols, rows)]])
# [[[0 540], [460 325], [520 325], [960 540]]]
roi_edges = roi_mask(edges, points)
# 3. 霍夫直线提取
drawing, lines = hough_lines(roi_edges, rho, theta,
threshold, min_line_len, max_line_gap)
# 4. 车道拟合计算
draw_lanes(drawing, lines)
# 5. 最终将结果合在原图上
result = cv2.addWeighted(img, 0.9, drawing, 0.2, 0)
return result
def roi_mask(img, corner_points):
# 创建掩膜
mask = np.zeros_like(img)
cv2.fillPoly(mask, corner_points, 255)
masked_img = cv2.bitwise_and(img, mask)
return masked_img
def hough_lines(img, rho, theta, threshold, min_line_len, max_line_gap):
# 统计概率霍夫直线变换
lines = cv2.HoughLinesP(img, rho, theta, threshold,
minLineLength=min_line_len, maxLineGap=max_line_gap)
# 新建一副空白画布
drawing = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint8)
# 画出直线检测结果
# draw_lines(drawing, lines)
return drawing, lines
def draw_lines(img, lines, color=[0, 0, 255], thickness=1):
for line in lines:
for x1, y1, x2, y2 in line:
cv2.line(img, (x1, y1), (x2, y2), color, thickness)
def draw_lanes(img, lines, color=[255, 0, 0], thickness=8):
# a. 划分左右车道
left_lines, right_lines = [], []
for line in lines:
for x1, y1, x2, y2 in line:
k = (y2 - y1) / (x2 - x1)
if k < 0:
left_lines.append(line)
else:
right_lines.append(line)
if (len(left_lines) <= 0 or len(right_lines) <= 0):
return
# b. 清理异常数据
clean_lines(left_lines, 0.1)
clean_lines(right_lines, 0.1)
# c. 得到左右车道线点的集合,拟合直线
left_points = [(x1, y1) for line in left_lines for x1, y1, x2, y2 in line]
left_points = left_points + [(x2, y2)
for line in left_lines for x1, y1, x2, y2 in line]
right_points = [(x1, y1)
for line in right_lines for x1, y1, x2, y2 in line]
right_points = right_points + \
[(x2, y2) for line in right_lines for x1, y1, x2, y2 in line]
left_results = least_squares_fit(left_points, 325, img.shape[0])
right_results = least_squares_fit(right_points, 325, img.shape[0])
# 注意这里点的顺序
vtxs = np.array(
[[left_results[1], left_results[0], right_results[0], right_results[1]]])
# d.填充车道区域
cv2.fillPoly(img, vtxs, (0, 255, 0))
# 或者只画车道线
# cv2.line(img, left_results[0], left_results[1], (0, 255, 0), thickness)
# cv2.line(img, right_results[0], right_results[1], (0, 255, 0), thickness)
def clean_lines(lines, threshold):
# 迭代计算斜率均值,排除掉与差值差异较大的数据
slope = [(y2 - y1) / (x2 - x1)
for line in lines for x1, y1, x2, y2 in line]
while len(lines) > 0:
mean = np.mean(slope)
diff = [abs(s - mean) for s in slope]
idx = np.argmax(diff)
if diff[idx] > threshold:
slope.pop(idx)
lines.pop(idx)
else:
break
def least_squares_fit(point_list, ymin, ymax):
# 最小二乘法拟合
x = [p[0] for p in point_list]
y = [p[1] for p in point_list]
# polyfit第三个参数为拟合多项式的阶数,所以1代表线性
fit = np.polyfit(y, x, 1)
fit_fn = np.poly1d(fit) # 获取拟合的结果
xmin = int(fit_fn(ymin))
xmax = int(fit_fn(ymax))
return [(xmin, ymin), (xmax, ymax)]
if __name__ == "__main__":
output = 'test_videos/yellow_lane_mark.mp4'
clip = VideoFileClip("test_videos/yellow_lane.mp4")
out_clip = clip.fl_image(process_an_image)
out_clip.write_videofile(output, audio=False)