提供一张答题卡图像,通过图像处理识别出答题卡上每个题的选项,与正确答案对比,得出分数并写在答题卡上。
import numpy as np
import cv2 as cv
def cv_show(name,img):
cv.imshow(name, img)
cv.waitKey(0)
cv.destroyAllWindows()
# 正确答案
ANSWER_KEY = {0: 1, 1: 4, 2: 0, 3: 3, 4: 1}
image = cv.imread('./images/test_01.png')
cv_show('image', image)
gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
blurred = cv.GaussianBlur(gray, (5, 5), 0)
cv_show('blurred', blurred)
edged = cv.Canny(blurred, 75, 200)
cv_show('edged', edged)
为了完成透视变换,需要检测出图像的外轮廓。
cnts = cv.findContours(edged.copy(), cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)[1]
contours_img = image.copy()
cv.drawContours(contours_img, cnts, -1, [0, 0, 255], 3)
cv_show('contours_img',contours_img)
print(np.array(cnts).shape)
(1, 89, 1, 2)
由此可见,代码只检测出此图像的一个含有89个点的外轮廓。如果检测到不止一个外轮廓,则需要通过比较轮廓周长、面积的方式筛选出最外围的轮廓,可以用如下代码实现:
cnts = sorted(cnts, key=cv.contourArea, reverse=True)
由于轮廓检测返回的轮廓是由89个点构成的,而我们需要进行的透视变换只需要4个点(左上,右上,右下,左下)的位置即可完成,故需要找到此外轮廓的近似多边形的坐标。
peri = cv.arcLength(cnts[0], True)
approx = cv.approxPolyDP(cnts[0], 0.02 * peri, True)
为了使现在得到的点与投射完成后的点的位置一一对应,在这里要先将这些点按一定顺序排列。
pts = approx.reshape(4, 2)
rect = np.zeros((4, 2), dtype=np.float32)
s = np.sum(pts, axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
变换后,图像的长和宽应该变为:
长 = max(变换前左边长,变换前右边长)
宽 = max(变换前上边长,变换前下边长)
设变换后图像的左上角位置为原点位置。
# 获取坐标点
tl, tr, br, bl = rect
# 计算输入的w和h值
widthA = np.sqrt(((br[0]-bl[0]) ** 2) + ((br[1]-bl[1]) ** 2))
widthB = np.sqrt(((tr[0]-tl[0]) ** 2) + ((tr[1]-tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# 变换后对应坐标位置
dst = np.array([[0, 0],
[maxWidth-1, 0],
[maxWidth-1, maxHeight-1],
[0, maxHeight-1]], dtype=np.float32)
H = cv.getPerspectiveTransform(rect, dst)
warped = cv.warpPerspective(gray, H, (maxWidth, maxHeight))
cv_show('warped',warped)
thresh = cv.threshold(warped, 0, 255, cv.THRESH_BINARY_INV|cv.THRESH_OTSU)[1]
cv_show('thresh',thresh)
thresh_Contours = thresh.copy()
cnts = cv.findContours(thresh_Contours, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)[1]
cv.drawContours(thresh_Contours, cnts, -1, (0, 0, 0), 3)
cv_show('thresh_Contours',thresh_Contours)
questionCnts = []
# 遍历
for c in cnts:
# 计算比例和大小
(x, y, w, h) = cv.boundingRect(c)
ar = w / float(h)
# 根据实际情况制定标准
if w >= 20 and h >= 20 and ar >= 0.9 and ar <= 1.1:
questionCnts.append(c)
boundingBoxes = [cv.boundingRect(c) for c in questionCnts]
(questionCnts, boundingBoxes) = zip(*sorted(zip(questionCnts, boundingBoxes), key=lambda b: b[1][1], reverse=False))
在这里使用的相关函数(zip、lambda等)可以参考lambda函数的用法、Python内置函数 – zip(), sorted(), filter()和map()、Python中星号变量的特殊用法。
如果想要用传统方式排序,可以调用如下代码:
boundingBoxes = [cv.boundingRect(c) for c in questionCnts]
questionCnt = []
boundingBox = []
for i in range(len(boundingBoxes)):
a = sorted(zip(questionCnts, boundingBoxes), key=lambda b: b[1][1], reverse=False)[i][0]
b = sorted(zip(questionCnts, boundingBoxes), key=lambda b: b[1][1], reverse=False)[i][1]
questionCnt.append(a)
boundingBox.append(b)
questionCnts = questionCnt
boundingBoxes = boundingBox
# 每排有5个选项
correct = 0
for (q, i) in enumerate(np.arange(0, len(questionCnts), 5)):
# 排序
boundingBoxes = [cv.boundingRect(c) for c in questionCnts[i:i + 5]]
(cnts, boundingBoxes) = zip(*sorted(zip(questionCnts[i:i + 5], boundingBoxes),
key=lambda b: b[1][0], reverse=False))
bubbled = None
# 遍历每一个结果
for (j, c) in enumerate(cnts):
# 使用mask来判断结果
mask = np.zeros(thresh.shape, dtype="uint8")
cv.drawContours(mask, [c], -1, 255, -1) #-1表示填充
# cv_show('mask',mask)
# 通过计算非零点数量来算是否选择这个答案
mask = cv.bitwise_and(thresh, thresh, mask=mask)
# cv_show('mask', mask)
total = cv.countNonZero(mask)
# 通过阈值判断
if bubbled is None or total > bubbled[0]:
bubbled = (total, j)
# 对比正确答案
color = (0, 0, 255)
k = ANSWER_KEY[q]
# 判断正确
if k == bubbled[1]:
color = (0, 255, 0)
correct += 1
# 绘图
cv.drawContours(warped, [cnts[k]], -1, color, 3)
score = (correct / 5.0) * 100
print("[INFO] score: {:.2f}%".format(score))
cv.putText(warped, "{:.2f}%".format(score), (10, 30),
cv.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 2)
cv.imshow("Original", image)
cv.imshow("Exam", warped)
cv.waitKey(0)
cv.destroyAllWindows()