效果图和解释看我 c++版本
import cv2
import numpy as np
import math
import os
def show(name, img):
# 显示图片
cv2.namedWindow(str(name), cv2.WINDOW_NORMAL)
cv2.resizeWindow(str(name), 800, 2000) # 改变窗口大小
cv2.imshow(str(name), img)
def dis(cx, cy, tx, ty):
x = np.array(cx, cy)
y = np.array(tx, ty)
return np.sqrt(np.sum(np.square(x - y)))
def subimage(image, center, theta, width, height):
theta *= np.pi / 180 # convert to rad
v_x = (np.cos(theta), np.sin(theta))
v_y = (-np.sin(theta), np.cos(theta))
s_x = center[0] - v_x[0] * (width / 2) - v_y[0] * (height / 2)
s_y = center[1] - v_x[1] * (width / 2) - v_y[1] * (height / 2)
mapping = np.array([[v_x[0], v_y[0], s_x],
[v_x[1], v_y[1], s_y]])
return cv2.warpAffine(image, mapping, (width, height), flags=cv2.WARP_INVERSE_MAP, borderMode=cv2.BORDER_REPLICATE)
def protheta(theta):
if theta < -45:
theta = -(90 + theta)
else:
theta = -theta
return theta
def findphoto(filep, photo_name, file_save_path):
filep += "/"
filep += photo_name
img = cv2.imread(filep)
img1 = img.copy()
img2 = img.copy()
# 灰度处理
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
show("img1", img_gray)
# 二值化处理
# ret, thresh = cv2.threshold(~img_gray, 150, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY)
thresh = cv2.adaptiveThreshold(~img_gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 15, -10)
# 图像, 将要设置的灰度值, 自适应阈值算法,
# opencv提供的二值化方法,要分成的区域大小,上面的N值,一般取奇数,
# 常数,每个区域计算出的阈值的基础上在减去这个常数作为这个区域的最终阈值,可以为负数
# cv2.RETR_LIST 检测的轮廓不建立等级关系
show("img2", thresh)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# contours 是轮廓点集合
index = 0
maxarea = -1
for i in range(len(contours)):
rect = cv2.minAreaRect(contours[i])
# minAreaRect函数返回的是一个叫Box2D 结构,举一个输出的Box2D的例子:其表示的意义是(中心点坐标,(宽度,高度),旋转的角度)
w = rect[1][0]
h = rect[1][1]
if (w * h > maxarea):
maxarea = w * h
index = i
# 绘制轮廓
cv2.drawContours(img, contours[index], -1, (0, 255, 0), 2) # 绿色
show("img3", img)
# 绘制旋转矩形
rect = cv2.minAreaRect(contours[index]) # 返回最小矩形的左上角xy坐标,长宽,和偏转角度
theta = rect[-1]
print(theta)
angle = protheta(theta)
box = np.int0(cv2.boxPoints(rect))
print(box[1])
# 效果相同
for i in range(4):
cv2.line(img, tuple(box[i]), tuple(box[(i + 1) % 4]), (0, 0, 255), 2) # 输入数据格式不正确,需要的是一个tuple
# cv2.drawContours(img, [box], 0, (255, 0, 0), 2) # 绘制蓝色矩形
show("img4", img)
# 绘制矩形
# brect = cv2.boundingRect(rect)
# print(brect)
# cv2.rectangle(img, brect, (255,0,0), 2)
print(type(contours[index]))
print(contours[index][313])
# x,y,w,h = cv2.boundingRect(contours[index])
# cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)
# cv2.drawContours(img, contours, -1, (0, 0, 255), 3)
# for i in range(len(contours[index])):
# 拟合直线
approx = cv2.approxPolyDP(contours[index], 50, True)
print(type(approx))
cv2.polylines(img, [approx], True, (255, 0, 0), 2)
show("img5", img)
sumx = 0
sumy = 0
csize = len(contours[index])
maxcon = contours[index]
for i in range(csize):
sumx += maxcon[i][0][0]
sumy += maxcon[i][0][1]
print(sumx / len(contours[index]))
print(sumy / len(contours[index]))
centerx = sumx / len(contours[index])
centery = sumy / len(contours[index])
print(approx.shape)
dislist = []
cor = []
# for i in range(len(approx)):
# dislist.append(dis(centerx,centery,approx[i][0][0],approx[i][0][1]))
# dislist.sort(reverse = True)
# print(dislist)
# cor = []
# for i in range(4):
# for j in range(len(approx)):
# if(dis(centerx,centery,approx[j][0][0],approx[j][0][1]) == dislist[i]):
# cor.append(approx[i][0])
# break
#
# for i in range(4):
# cv2.line(img1, tuple(cor[i]), tuple(cor[(i + 1) % 4]), (0, 0, 255), 2) # 输入数据格式不正确,需要的是一个tuple
# show("img6", img1)
# print(cor)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
maxdis = 0
for i in range(len(approx)):
if (dis(centerx, centery, approx[i][0][0], approx[i][0][1]) > maxdis) and approx[i][0][0] < centerx and \
approx[i][0][1] < centery:
maxdis = dis(centerx, centery, approx[i][0][0], approx[i][0][1])
for i in range(len(approx)):
if (dis(centerx, centery, approx[i][0][0], approx[i][0][1]) == maxdis):
cor.append(approx[i][0])
break
maxdis = 0
for i in range(len(approx)):
if (dis(centerx, centery, approx[i][0][0], approx[i][0][1]) > maxdis) and approx[i][0][0] > centerx and \
approx[i][0][1] < centery:
maxdis = dis(centerx, centery, approx[i][0][0], approx[i][0][1])
for i in range(len(approx)):
if (dis(centerx, centery, approx[i][0][0], approx[i][0][1]) == maxdis):
cor.append(approx[i][0])
break
maxdis = 0
for i in range(len(approx)):
if (dis(centerx, centery, approx[i][0][0], approx[i][0][1]) > maxdis) and approx[i][0][0] > centerx and \
approx[i][0][1] > centery:
maxdis = dis(centerx, centery, approx[i][0][0], approx[i][0][1])
for i in range(len(approx)):
if (dis(centerx, centery, approx[i][0][0], approx[i][0][1]) == maxdis):
cor.append(approx[i][0])
break
maxdis = 0
for i in range(len(approx)):
if (dis(centerx, centery, approx[i][0][0], approx[i][0][1]) > maxdis) and approx[i][0][0] < centerx and \
approx[i][0][1] > centery:
maxdis = dis(centerx, centery, approx[i][0][0], approx[i][0][1])
for i in range(len(approx)):
if (dis(centerx, centery, approx[i][0][0], approx[i][0][1]) == maxdis):
cor.append(approx[i][0])
break
for i in range(4):
cv2.line(img1, tuple(cor[i]), tuple(cor[(i + 1) % 4]), (0, 0, 255), 2) # 输入数据格式不正确,需要的是一个tuple
show("img6", img1)
print("Cor:")
print(cor)
channels = img2.shape[2]
mask = np.zeros(img2.shape, dtype=np.uint8)
channel_count = channels
ignore_mask_color = (255,) * channel_count
# 创建mask层
cv2.fillPoly(mask, [np.array(cor)], ignore_mask_color)
# 为每个像素进行与操作,除mask区域外,全为0
masked_image = cv2.bitwise_and(img2, mask)
show("img7", masked_image)
# cv2.imwrite("photo2/1.jpg", masked_image)
print("shape")
print(type(cor))
print(cor[0][0])
# 矩形变换 仿射变换
img_src = masked_image
height, width = img_src.shape[:2]
height, width = img.shape[:2]
# height = 3000
# width = 4000
print("img width:%d height:%d" % (width, height))
# 2.创建原图与目标图的对应点
# [array([187, 162], dtype=int32), array([2816, 247], dtype=int32), array([2787, 3836], dtype=int32),
# array([226, 3927], dtype=int32)]
# src_point = np.float32([[187, 162], [2816, 247],
# [2787, 3836], [226, 3927]])
# src_point = np.float32([[cor[0][0][0],cor[0][0][1]], [cor[1][0][0],cor[1][0][1]],
# [cor[3][0][0],cor[3][0][1]],[cor[2][0][0],cor[2][0][1]]])
src_point = np.float32([[cor[0][0], cor[0][1]], [cor[1][0], cor[1][1]],
[cor[3][0], cor[3][1]], [cor[2][0], cor[2][1]]])
dst_point = np.float32([[0, 0], [width - 1, 0],
[0, height - 1], [width - 1, height - 1]])
# 3.获取透视变换矩阵
perspective_matrix = cv2.getPerspectiveTransform(src_point, dst_point)
# 4.执行透视变换
img_dst = cv2.warpPerspective(img_src, perspective_matrix, (width, height))
cv2.namedWindow('img_dst', 0)
cv2.resizeWindow('img_dst', 700, 900) # 自己设定窗口图片的大小
file_save_path += photo_name
# cv2.imwrite(file_path, rotated)
cv2.imwrite(file_save_path, img_dst)
cv2.imshow("img_dst", img_dst)
masked_image = img_dst.copy()
# 旋转
# 霍夫直线
hufu = thresh.astype(np.uint8)
lines = cv2.HoughLinesP(hufu, 1, np.pi / 180, 30, minLineLength=40, maxLineGap=100)
k_dict = {}
k = 0
for line in lines:
if line[0][2] - line[0][0] == 0:
continue
# print(line[0][3], line[0][1], line[0][2], line[0][0])
k = (line[0][3] - line[0][1]) / (line[0][2] - line[0][0])
# α = atan(k) * 180 / PI
k = math.atan(k) * 180 / np.pi
if len(k_dict.keys()) == 0:
k_dict[k] = 1
else:
flag = False
for item in k_dict.keys():
if abs(item - k) < 2:
flag = True
k_dict[item] += 1
break
if not flag:
k_dict[k] = 1
must_k_num = 0
must_key = 0
for item in k_dict.keys():
if k_dict[item] > must_k_num:
must_k_num = k_dict[item]
must_key = item
# print(must_key)
# # 在图像上展示霍夫直线描出的直线
# for line in lines:
# cv2.line(img, (line[0][0], line[0][1]), (line[0][2], line[0][3]), (0, 0, 255), 2)
# 旋转图像
h, w = masked_image.shape[:2]
add_w = int((((w * w + h * h) ** 0.5) - w) / 2)
add_h = int((((w * w + h * h) ** 0.5) - h) / 2)
print(add_w, add_h)
masked_image = cv2.copyMakeBorder(masked_image, add_h, add_h, add_w, add_w, cv2.BORDER_CONSTANT, value=[0, 0, 0])
h, w = masked_image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, must_key, 1.0)
rotated = cv2.warpAffine(masked_image, M, (w, h), flags=cv2.INTER_CUBIC)
# file_path = "photo/"
# file_path += photo_name
file_save_path += photo_name
# cv2.imwrite(file_path, rotated)
show("rotated", rotated)
# cv2.imshow('Rotated', rotated)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == '__main__':
# filep = '/home/yfzx/dataset/TextCor/'
# photo_name = '1.jpg'
# file_save_path = "photo1/"
#
# findphoto(filep, photo_name, file_save_path)
# filep = "/home/yfzx/dataset/TextCor"
filep = "/home/yfzx/project/workbook/textCorrection/testphoto"
photonames = os.listdir(filep)
file_save_path = "testphoto_save/"
for photoname in photonames:
print(photoname)
findphoto(filep, photoname, file_save_path)