整体架构:
Unet网络架构:
import tensorflow as tf
def convolutional(input_data, filters_shape, trainable, name, downsample=False, activate=True, bn=True):
with tf.variable_scope(name):
if downsample:
pad_h, pad_w = (filters_shape[0] - 2) // 2 + 1, (filters_shape[1] - 2) // 2 + 1
paddings = tf.constant([[0, 0], [pad_h, pad_h], [pad_w, pad_w], [0, 0]])
input_data = tf.pad(input_data, paddings, 'CONSTANT')
strides = (1, 2, 2, 1)
padding = 'VALID'
else:
strides = (1, 1, 1, 1)
padding = "SAME"
weight = tf.get_variable(name='weight', dtype=tf.float32, trainable=True,
shape=filters_shape, initializer=tf.random_normal_initializer(stddev=0.01))
conv = tf.nn.conv2d(input=input_data, filter=weight, strides=strides, padding=padding)
if bn:
conv = tf.layers.batch_normalization(conv, beta_initializer=tf.zeros_initializer(),
gamma_initializer=tf.ones_initializer(),
moving_mean_initializer=tf.zeros_initializer(),
moving_variance_initializer=tf.ones_initializer(), training=trainable)
else:
bias = tf.get_variable(name='bias', shape=filters_shape[-1], trainable=True,
dtype=tf.float32, initializer=tf.constant_initializer(0.0))
conv = tf.nn.bias_add(conv, bias)
if activate is True:
conv = tf.nn.leaky_relu(conv, alpha=0.1)
return conv
def upsample(input_data, name, method="deconv"):
assert method in ["resize", "deconv"]
if method == "resize":
with tf.variable_scope(name):
input_shape = tf.shape(input_data)
output = tf.image.resize_nearest_neighbor(input_data, (input_shape[1] * 2, input_shape[2] * 2))
if method == "deconv":
# replace resize_nearest_neighbor with conv2d_transpose To support TensorRT optimization
numm_filter = input_data.shape.as_list()[-1]
output = tf.layers.conv2d_transpose(input_data, numm_filter//2, kernel_size=4, padding='same',
strides=(2, 2), kernel_initializer=tf.random_normal_initializer())
return output
def Unet(images, filters=8, name='unet'):
with tf.variable_scope(name):
endpoints = {}
conv = convolutional(images, [3, 3, 3, filters], trainable=True, name='conv1')
conv = convolutional(conv, [3, 3, filters, filters], trainable=True, name='conv2')
endpoints['C1'] = conv
# downsample 1
conv = convolutional(conv, [3, 3, filters, filters], trainable=True, name='conv3', downsample=True)
conv = convolutional(conv, [3, 3, filters, filters * 2], trainable=True, name='conv4')
conv = convolutional(conv, [3, 3, filters * 2, filters * 2], trainable=True, name='conv5')
endpoints['C2'] = conv
# downsample 2
conv = convolutional(conv, [3, 3, filters * 2, filters * 2], trainable=True, name='conv6', downsample=True)
conv = convolutional(conv, [3, 3, filters * 2, filters * 4], trainable=True, name='conv7')
conv = convolutional(conv, [3, 3, filters * 4, filters * 4], trainable=True, name='conv8')
endpoints['C3'] = conv
# downsample 3
conv = convolutional(conv, [3, 3, filters * 4, filters * 4], trainable=True, name='conv9', downsample=True)
conv = convolutional(conv, [3, 3, filters * 4, filters * 8], trainable=True, name='conv10')
conv = convolutional(conv, [3, 3, filters * 8, filters * 8], trainable=True, name='conv11')
endpoints['C4'] = conv
# downsample 4
conv = convolutional(conv, [3, 3, filters * 8, filters * 8], trainable=True, name='conv12', downsample=True)
conv = convolutional(conv, [3, 3, filters * 8, filters * 16], trainable=True, name='conv13')
conv = convolutional(conv, [3, 3, filters * 16, filters * 16], trainable=True, name='conv14')
endpoints['C5'] = conv
conv = convolutional(conv, [3, 3, filters * 16, filters * 16], trainable=True, name='conv15', downsample=True)
conv = convolutional(conv, [3, 3, filters * 16, filters * 32], trainable=True, name='conv16')
conv = convolutional(conv, [3, 3, filters * 32, filters * 32], trainable=True, name='conv17')
endpoints['C6'] = conv
conv = convolutional(conv, [3, 3, filters * 32, filters * 32], trainable=True, name='conv18', downsample=True)
conv = convolutional(conv, [3, 3, filters * 32, filters * 64], trainable=True, name='conv19')
conv = convolutional(conv, [3, 3, filters * 64, filters * 64], trainable=True, name='conv20')
endpoints['C7'] = conv
for i in range(7, 1, -1):
with tf.variable_scope('Ronghe%d' % i):
uplayer = upsample(conv, 'deconv%d' % (8-i), method="deconv")
concat = tf.concat([endpoints['C%d' % (i-1)], uplayer], axis=-1)
dim = concat.get_shape()[-1].value
conv = convolutional(concat, [3, 3, dim, dim//2], trainable=True, name='conv1')
conv = convolutional(conv, [3, 3, dim//2, dim//2], trainable=True, name='conv2')
out = convolutional(conv, [3, 3, dim//2, 1], trainable=True, name='out', activate=False, bn=False)
return out
HED网络架构:
# hed演进版本RCF
def charsiu2_binary_net(input_image, is_training):
filter_initializer = tf.contrib.layers.xavier_initializer()
activation_func = tf.nn.relu6
def conv2d(inputs, filters, kernel_size, stride, scope=''):
with tf.variable_scope(scope):
with tf.variable_scope('conv2d'):
outputs = tf.layers.conv2d(inputs,
filters,
kernel_size,
strides=(stride, stride),
padding='same',
activation=None,
use_bias=False,
kernel_initializer=filter_initializer)
outputs = tf.layers.batch_normalization(outputs, training=is_training)
outputs = tf.nn.relu(outputs)
return outputs
def _1x1_conv2d(inputs, filters, stride):
kernel_size = [1, 1]
with tf.variable_scope('1x1_conv2d'):
outputs = tf.layers.conv2d(inputs,
filters,
kernel_size,
strides=(stride, stride),
padding='same',
activation=None,
use_bias=False,
kernel_initializer=filter_initializer)
outputs = tf.layers.batch_normalization(outputs, training=is_training)
return outputs
def expansion_conv2d(inputs, expansion, stride):
input_shape = inputs.get_shape().as_list()
assert len(input_shape) == 4
filters = input_shape[3] * expansion
kernel_size = [1, 1]
with tf.variable_scope('expansion_1x1_conv2d'):
outputs = tf.layers.conv2d(inputs,
filters,
kernel_size,
strides=(stride, stride),
padding='same',
activation=None,
use_bias=False,
kernel_initializer=filter_initializer)
outputs = tf.layers.batch_normalization(outputs, training=is_training)
outputs = activation_func(outputs)
return outputs
def projection_conv2d(inputs, filters, stride):
kernel_size = [1, 1]
with tf.variable_scope('projection_1x1_conv2d'):
outputs = tf.layers.conv2d(inputs,
filters,
kernel_size,
strides=(stride, stride),
padding='same',
activation=None,
use_bias=False,
kernel_initializer=filter_initializer)
outputs = tf.layers.batch_normalization(outputs, training=is_training)
return outputs
def depthwise_conv2d(inputs,depthwise_conv_kernel_size, stride):
with tf.variable_scope('depthwise_conv2d'):
outputs = tf.contrib.layers.separable_conv2d(
inputs,
None,
depthwise_conv_kernel_size,
depth_multiplier=1,
stride=(stride, stride),
padding='SAME',
activation_fn=None,
weights_initializer=filter_initializer,
biases_initializer=None)
outputs = tf.layers.batch_normalization(outputs, training=is_training)
outputs = activation_func(outputs)
return outputs
def inverted_residual_block(inputs,filters,stride,expansion=6,scope=''):
assert stride == 1 or stride == 2
depthwise_conv_kernel_size = [3, 3]
pointwise_conv_filters = filters
with tf.variable_scope(scope):
net = inputs
net = expansion_conv2d(net, expansion, stride=1)
net = depthwise_conv2d(net, depthwise_conv_kernel_size, stride=stride)
net = projection_conv2d(net, pointwise_conv_filters, stride=1)
if stride == 1:
if net.get_shape().as_list()[3] != inputs.get_shape().as_list()[3]:
inputs = _1x1_conv2d(inputs, net.get_shape().as_list()[3], stride=1)
net = net + inputs
return net
else:
return net
def _dsn_1x1_conv2d(inputs, filters):
kernel_size = [1, 1]
outputs = tf.layers.conv2d(inputs,
filters,
kernel_size,
padding='same',
activation=None,
use_bias=False,
kernel_initializer=filter_initializer,
kernel_regularizer=None)
outputs = tf.layers.batch_normalization(outputs, training=is_training)
return outputs
def _output_1x1_conv2d(inputs, filters):
kernel_size = [1, 1]
outputs = tf.layers.conv2d(inputs,
filters,
kernel_size,
padding='same',
activation=None,
use_bias=True,
kernel_initializer=filter_initializer,
kernel_regularizer=None)
return outputs
def _dsn_deconv2d_with_upsample_factor(inputs, filters, upsample_factor):
kernel_size = [2 * upsample_factor, 2 * upsample_factor]
outputs = tf.layers.conv2d_transpose(inputs,
filters,
kernel_size,
strides=(upsample_factor, upsample_factor),
padding='same',
activation=None,
use_bias=True,
kernel_initializer=filter_initializer,
kernel_regularizer=None)
return outputs
def _dsn_1x1_conv2d_elementhwise(prev_layer_list, combine_fn=tf.add, scope='_dsn_1x1_conv2d_elementhwise'):
outputs = _dsn_1x1_conv2d(prev_layer_list[0], 1)
with tf.variable_scope(scope):
for l in prev_layer_list[1:]:
outputs = combine_fn(_dsn_1x1_conv2d(l, 1), l)
return outputs
# 网络结构定义
with tf.variable_scope('Charsiu2Binary', values=[input_image]):
net = input_image
# mobilenet层
with tf.variable_scope('mobilenet_v2'):
net = conv2d(net, 3, [3,3], stride=1, scope='block0_0')
net = conv2d(net, 6, [3,3], stride=1, scope='block0_1')
dsn0 = net
net = conv2d(net, 12, [3, 3], stride=2, scope='block0_2') # size/2
dsn1_1 = net
net = inverted_residual_block(net, 6, stride=1, expansion=1, scope='block1_0')
dsn1_2 = net
dsn2 = net
net = inverted_residual_block(net, 12, stride=2, scope='block2_0') # size/4
dsn2_1 = net
net = inverted_residual_block(net, 12, stride=1, scope='block2_1')
dsn2_2 = net
dsn3 = net
net = inverted_residual_block(net, 24, stride=2, scope='block3_0') # size/8
dsn3_1 = net
net = inverted_residual_block(net, 24, stride=1, scope='block3_1')
dsn3_2 = net
net = inverted_residual_block(net, 24, stride=1, scope='block3_2')
dsn3_3 = net
dsn4 = net
net = inverted_residual_block(net, 48, stride=2, scope='block4_0')
dsn4_1 = net
net = inverted_residual_block(net, 48, stride=1, scope='block4_1')
dsn4_2 = net
net = inverted_residual_block(net, 48, stride=1, scope='block4_2')
dsn4_3 = net
net = inverted_residual_block(net, 48, stride=1, scope='block4_3')
dsn4_4 = net
net = inverted_residual_block(net, 64, stride=1, scope='block5_0')
dsn5_1 = net
net = inverted_residual_block(net, 64, stride=1, scope='block5_1')
dsn5_2 = net
net = inverted_residual_block(net, 64, stride=1, scope='block5_2')
dsn5_3 = net
dsn5 = net
## dsn层
with tf.variable_scope('dsn0'):
dsn0 = _dsn_1x1_conv2d(dsn0, 1)
print(dsn0)
with tf.variable_scope('dsn1'):
dsn1_1 = _dsn_1x1_conv2d(dsn1_1, 3)
dsn1_2 = _dsn_1x1_conv2d(dsn1_2, 3)
dsn1 = _dsn_1x1_conv2d_elementhwise([dsn1_1, dsn1_2])
dsn1 = _dsn_1x1_conv2d(dsn1, 1)
dsn1 = _dsn_deconv2d_with_upsample_factor(dsn1, 1, upsample_factor = 2)
print(dsn1)
with tf.variable_scope('dsn2'):
dsn2_1 = _dsn_1x1_conv2d(dsn2_1, 6)
dsn2_2 = _dsn_1x1_conv2d(dsn2_2, 6)
dsn2 = _dsn_1x1_conv2d_elementhwise([dsn2_1, dsn2_2])
dsn2 = _dsn_1x1_conv2d(dsn2, 1)
dsn2 = _dsn_deconv2d_with_upsample_factor(dsn2, 1, upsample_factor = 4)
print(dsn2)
with tf.variable_scope('dsn3'):
dsn3_1 = _dsn_1x1_conv2d(dsn3_1, 12)
dsn3_2 = _dsn_1x1_conv2d(dsn3_2, 12)
dsn3_3 = _dsn_1x1_conv2d(dsn3_3, 12)
dsn3 = _dsn_1x1_conv2d_elementhwise([dsn3_1, dsn3_2, dsn3_3])
dsn3 = _dsn_1x1_conv2d(dsn3, 1)
dsn3 = _dsn_deconv2d_with_upsample_factor(dsn3, 1, upsample_factor = 8)
print(dsn3)
with tf.variable_scope('dsn4'):
dsn4_1 = _dsn_1x1_conv2d(dsn4_1, 24)
dsn4_2 = _dsn_1x1_conv2d(dsn4_2, 24)
dsn4_3 = _dsn_1x1_conv2d(dsn4_3, 24)
dsn4_4 = _dsn_1x1_conv2d(dsn4_4, 24)
dsn4 = _dsn_1x1_conv2d_elementhwise([dsn4_1, dsn4_2, dsn4_3, dsn4_4])
dsn4 = _dsn_1x1_conv2d(dsn4, 1)
dsn4 = _dsn_deconv2d_with_upsample_factor(dsn4, 1, upsample_factor = 16)
print(dsn4)
with tf.variable_scope('dsn5'):
dsn5_1 = _dsn_1x1_conv2d(dsn5_1, 32)
dsn5_2 = _dsn_1x1_conv2d(dsn5_2, 32)
dsn5_3 = _dsn_1x1_conv2d(dsn5_3, 32)
dsn5 = _dsn_1x1_conv2d_elementhwise([dsn5_1, dsn5_2, dsn5_3])
dsn5 = _dsn_1x1_conv2d(dsn5, 1)
dsn5 = _dsn_deconv2d_with_upsample_factor(dsn5, 1, upsample_factor = 16)
print(dsn5)
with tf.variable_scope('dsn_fuse'):
dsn_fuse = tf.concat([dsn0, dsn1, dsn2, dsn3, dsn4, dsn5], 3)
dsn_fuse = _output_1x1_conv2d(dsn_fuse, 1)
return dsn_fuse
训练代码:
import re
import numpy as np
import cv2
import os
from nets.unet import Unet
from nets.modelnet import model
from nets.graph_define import charsiu_binary_net
from nets.lossfunc import *
from nets.func import params_usage
from tools import img_mask_color_blending
from tfsummary_logger import TFSummaryLogger
import cfg
from generator import Generator
import time
if __name__ == '__main__':
os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpulist
# 网络结构
with tf.name_scope('Input'):
input_image = tf.placeholder(tf.uint8, shape=(None, cfg.input_h, cfg.input_w, 3))
input_image_normal = tf.div(tf.to_float(input_image), 255.0)
label_image = tf.placeholder(tf.uint8, shape=(None, cfg.input_h, cfg.input_w))
label_image_normal = tf.cast(tf.expand_dims(label_image, -1), dtype=tf.float32)
# 配置学习率ls
learning_rate = tf.placeholder(tf.float32, shape=[])
#dsn_fuse = charsiu_binary_net(input_image_normal, name='Modelnet')
dsn_fuse = Unet(input_image_normal,name='Modelnet')
output_sigmoid = tf.nn.sigmoid(dsn_fuse, name='ouput_sigmoid')
binary_dsn_fuse = tf.where(tf.greater(output_sigmoid, cfg.eval_sigmoid_score),
tf.ones_like(dsn_fuse, tf.uint8) * 255, tf.zeros_like(dsn_fuse, tf.uint8))
# 反向传播
with tf.variable_scope('BP'):
with tf.variable_scope('focal_loss'):
# print(tf.shape(label_image_normal))
#print(tf.shape(output_sigmoid))
cost = confidence_loss(label_image_normal, output_sigmoid, alpha=0.99)
with tf.variable_scope('adam_vars'):
# 激活BN层
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
num_params = params_usage()
print('参数数量 : %d' % num_params)
# 保存训练的网络权重参数
train_weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Modelnet')
saver = tf.train.Saver(train_weights, max_to_keep=10)
# 定义保存日志
tfsummary_logger = TFSummaryLogger(cfg.logspath, tf.get_default_graph())
TDG = Generator(cfg.traintxt, (cfg.input_h, cfg.input_h), cfg.batch_size)
TDG.shuffle()
VDG = Generator(cfg.traintxt, (cfg.input_h, cfg.input_h), cfg.batch_size)
# 开始训练
global_init = tf.global_variables_initializer()
tfconfig = tf.ConfigProto(allow_soft_placement=False, log_device_placement=False)
tfconfig.gpu_options.allow_growth = True
sess = tf.Session(config=tfconfig)
sess.run(global_init)
# 获取最近一次训练模型
ckpt = tf.train.latest_checkpoint(cfg.checkpoint_path)
if cfg.restore and ckpt:
epoch = int(ckpt.split('-')[-1])
# min_val_loss = float(re.findall(r'_val_loss_(.+?)_loss_', ckpt)[0])
print('continue training from previous checkpoint')
print(ckpt)
# print('val loss {:.6f}'.format(min_val_loss))
saver.restore(sess, ckpt)
else:
epoch = 0
min_val_loss = 1000000.
lr = max(cfg.learning_rate * (cfg.decay_rate ** (epoch // cfg.decay_epoch)), 1e-4)
while epoch <= cfg.max_epoch:
total_loss = 0
for step in range(TDG.steps_per_epoch):
t = time.time()
data = TDG.generator()
t1 = time.time()
print('Load image time: %.4f' % (time.time() - t))
_, ml = sess.run([train_op, cost], feed_dict={input_image: data[0], label_image: data[1], learning_rate: lr})
print('Net time %.4f' % (time.time() - t1))
global_step = TDG.steps_per_epoch * epoch + step
tfsummary_logger.log_scalar(tag='step_loss', value=ml, step=global_step)
total_loss += ml
print('Epoch:%d / %d Step: %d / %d ' % (epoch, cfg.max_epoch, step + 1, TDG.steps_per_epoch))
print('Total loss %.6f model loss %.6f Learning rage %.8f' % (total_loss / (step + 1), ml, lr))
if np.isnan(ml):
print('Loss diverged, stop training')
break
else:
tfsummary_logger.log_scalar(tag='train_loss', value=total_loss, step=epoch)
if True:
print('Val...')
epoch += 1
global_step = TDG.steps_per_epoch * epoch
total_loss = total_loss / TDG.steps_per_epoch
val_loss = 0
img_num = 0
log_img_folder = os.path.join(cfg.logspath, 'Epoch_%d'%epoch)
log_imgs = []
for i in range(VDG.steps_per_epoch):
val_data = VDG.generator()
vml, sgmoid_img, binary_img = sess.run([cost, output_sigmoid, binary_dsn_fuse], feed_dict={input_image: val_data[0], label_image: val_data[1]})
val_loss += vml
# log_img = img_mask_color_blending(val_data[0][0], binary_img[0, :, :, 0], [255])
log_img = np.concatenate([val_data[1][0]*255, binary_img[0][:, :, 0], sgmoid_img[0][:, :, 0]*255], axis=1)
log_imgs.append(log_img)
tfsummary_logger.log_images(tag='Result', images=log_imgs, step=epoch)
val_loss = val_loss / TDG.steps_per_epoch
print('Eval loss {:.4f}'.format(val_loss))
tfsummary_logger.log_scalar(tag='val_loss', value=val_loss, step=epoch)
else:
val_loss = total_loss
if val_loss < min_val_loss:
min_val_loss = val_loss
saver.save(sess, os.path.join(cfg.checkpoint_path, 'Val_loss_%.6f_loss_%.6f_epoch' % (val_loss, total_loss)),
global_step=epoch, write_meta_graph=False)
print("Model save !")
else:
print('val_loss 没有减少,模型未保存。')
if epoch % cfg.decay_epoch == 0:
lr = max(lr * cfg.decay_rate, 1e-8)
TDG.shuffle()
continue
break
sess.close()
后处理Python(OpenCV)版本 :
import logging
import math
from pathlib import Path
import cv2
import numpy as np
import tensorflow as tf
import time
__SHOW_PROCESS = False
__model_input_height = 256
__model_input_width = __model_input_height
__model_input_layer_name = 'input_img_normal:0'
__model_input_layer2_name = 'input_sigmoid_score:0'
__model_output_layer_name = 'output_binary:0'
__HoughLinesPThreshold = 20
__HoughLinesPMinLinLength = __model_input_height / 6.0
__HoughLinesPMaxLineGap = __model_input_height / 25.0
__min_distance_to_merge = __model_input_height / 10
__min_angle_to_merge = 10
# 计算线段的角度
def __line_degress(line):
orientation = math.atan2((line[1] - line[3]), (line[0] - line[2]))
degress = abs(math.degrees(orientation))
return degress
# 两个向量的夹角
def __two_vector_angle(center, m, n):
theta = math.atan2(m[0] - center[0], m[1] - center[1]) - math.atan2(n[0] - center[0], n[1] - center[1])
if theta > math.pi:
theta -= 2 * math.pi
if theta < -math.pi:
theta += 2 * math.pi
return abs(theta * 180.0 / math.pi)
# 计算两条线段之间的距离
def __line_distance(line1, line2):
def __line_magnitude(x1, y1, x2, y2):
lineMagnitude = math.sqrt(math.pow((x2 - x1), 2) + math.pow((y2 - y1), 2))
return lineMagnitude
def __point_to_line_distance(point, line):
px, py = point
x1, y1, x2, y2 = line
line_magnitude = __line_magnitude(x1, y1, x2, y2)
if line_magnitude < 0.00000001:
return 9999
else:
u1 = (((px - x1) * (x2 - x1)) + ((py - y1) * (y2 - y1)))
u = u1 / (line_magnitude * line_magnitude)
if (u < 0.00001) or (u > 1):
# // closest point does not fall within the line segment, take the shorter distance
# // to an endpoint
ix = __line_magnitude(px, py, x1, y1)
iy = __line_magnitude(px, py, x2, y2)
if ix > iy:
distance = iy
else:
distance = ix
else:
# Intersecting point is on the line, use the formula
ix = x1 + u * (x2 - x1)
iy = y1 + u * (y2 - y1)
distance = __line_magnitude(px, py, ix, iy)
return distance
dist1 = __point_to_line_distance((line1[0], line1[1]), line2)
dist2 = __point_to_line_distance((line1[2], line1[3]), line2)
dist3 = __point_to_line_distance((line2[0], line2[1]), line1)
dist4 = __point_to_line_distance((line2[2], line2[3]), line1)
return min(dist1, dist2, dist3, dist4)
# 合并同一个分组里面的线段
def __merge_lines_segments(lines_group):
if (len(lines_group) == 1):
return lines_group[0]
points = []
for x1, y1, x2, y2 in lines_group:
points.append((x1, y1))
points.append((x2, y2))
degress = __line_degress(lines_group[0])
if (degress > 45) and abs(degress < (90 + 45)):
# 竖线使用y轴坐标, 最上面和最下面的点
points = sorted(points, key=lambda point: point[1])
else:
# 横线处理x轴坐标, 最左边和最右边的点
points = sorted(points, key=lambda point: point[0])
return (*points[0], *points[-1])
# 合并霍夫曼线段
def __merge_hough_lines(lines):
# 按倾斜角度对线段进行第一次分组(横线,竖线)
lines_x = []
lines_y = []
for l in lines:
degress = __line_degress(l)
if (degress > 45) and abs(degress < (90 + 45)):
lines_x.append(l)
else:
lines_y.append(l)
# 按照线段之间的角度和距离进行合并操作
super_lines = []
# 按距离和角度进行第二次分组
for lines_group in [lines_x, lines_y]:
for idx, line in enumerate(lines_group):
group_updated = False
# 查看当前线段是否可以合并到已有的分组里面
for group in super_lines:
for line2 in group:
if __line_distance(line2, line) < __min_distance_to_merge:
degress_i = __line_degress(line)
degress_j = __line_degress(line2)
if int(abs(degress_i - degress_j)) < __min_angle_to_merge:
group.append(line)
group_updated = True
break
if group_updated:
break
# 对于无法插入已有分组的线条建立新分组, 并筛选已经处理过的线段
if not group_updated:
new_group = []
new_group.append(line)
for line2 in lines_group[idx + 1:]:
if __line_distance(line2, line) < __min_distance_to_merge:
degress_i = __line_degress(line)
degress_j = __line_degress(line2)
if int(abs(degress_i - degress_j)) < __min_angle_to_merge:
new_group.append(line)
super_lines.append(new_group)
# 对分组的线段进行合并
final_lines = []
for lines_group in super_lines:
final_lines.append(__merge_lines_segments(lines_group))
return final_lines
# 两条线的交点
def __line_intersection(line1, line2):
xdiff = (line1[0][0] - line1[1][0], line2[0][0] - line2[1][0])
ydiff = (line1[0][1] - line1[1][1], line2[0][1] - line2[1][1])
def det(a, b):
return a[0] * b[1] - a[1] * b[0]
div = det(xdiff, ydiff)
if div == 0:
return None
d = (det(*line1), det(*line2))
x = det(d, xdiff) / div
y = det(d, ydiff) / div
return (int(x), int(y))
# 查找矩形
def __find_rect(lines):
# 线段分组
lines_x = [] # 竖线
lines_y = [] # 横线
for l in lines:
degress = __line_degress(l)
if (degress > 45) and abs(degress < (90 + 45)):
lines_x.append(((l[0], l[1]), (l[2], l[3])))
else:
lines_y.append(((l[0], l[1]), (l[2], l[3])))
lines_x = sorted(lines_x, key=lambda x: (x[0][0] + x[1][0]) / 2) # 从左往右排序
lines_y = sorted(lines_y, key=lambda x: (x[0][1] + x[1][1]) / 2) # 从上往下排序
if len(lines_x) < 2 or len(lines_y) < 2:
return None
top_left_point = __line_intersection(lines_x[0], lines_y[0])
top_right_point = __line_intersection(lines_x[-1], lines_y[0])
bottom_right_point = __line_intersection(lines_x[-1], lines_y[-1])
bottom_left_point = __line_intersection(lines_x[0], lines_y[-1])
return (top_left_point, top_right_point, bottom_right_point, bottom_left_point)
def __img_straighten(image_origin, src_vertices, output_width = 720, output_height = 456):
rect_dist = [(0, 0), (0, output_height), (output_width, 0), (output_width, output_height)]
src = [src_vertices[0], src_vertices[3], src_vertices[1], src_vertices[2]]
m1 = cv2.getPerspectiveTransform(np.float32(src), np.float32(rect_dist))
img_new = cv2.warpPerspective(image_origin, m1, (output_width, output_height))
return img_new
# 加载模型
def __load_tf_session(pbfile):
# 加载模型
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(pbfile, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
tfconfig = tf.ConfigProto(allow_soft_placement=False, log_device_placement=False)
tfconfig.gpu_options.allow_growth = True
return tf.Session(config=tfconfig, graph=detection_graph)
# 边缘检测
tf_session_edge = __load_tf_session(pbfile='./models/edge/xxxxxx-443.pb')
def card_edge_detect(origin_img, scores=0.9, imgpath='0.jpg'):
try:
x_ratio = float(origin_img.shape[1]) / __model_input_width
y_ratio = float(origin_img.shape[0]) / __model_input_height
img = cv2.resize(origin_img, (__model_input_width, __model_input_height), 0, 0, cv2.INTER_LINEAR)
input_img = np.float32(img) / 255.0
# 网络处理
output_dict = tf_session_edge.run([__model_output_layer_name], feed_dict={
__model_input_layer_name: [input_img],
__model_input_layer2_name: scores # hkold-edge-256X256-922000.pb运行才注释掉
})
edge_img = output_dict[0].squeeze().astype(np.uint8)
if __SHOW_PROCESS:
cv2.imwrite(imgpath.replace('.jpg', '_edge_img.jpg'), edge_img*255)
# 霍夫曼找线段
lines = cv2.HoughLinesP(edge_img,
rho=1,
theta=np.pi * 1 / 180,
threshold=__HoughLinesPThreshold,
minLineLength=__HoughLinesPMinLinLength,
maxLineGap=__HoughLinesPMaxLineGap)
if lines is None:
return None
lines = lines.squeeze(axis=1)
if __SHOW_PROCESS:
show_img = origin_img.copy()
for l in lines:
x1, y1, x2, y2 = l
x1, y1, x2, y2 = int(x1 * x_ratio), int(y1 * y_ratio), int(x2 * x_ratio), int(y2 * y_ratio)
cv2.line(show_img, (x1, y1), (x2, y2), (0, 0, 255), 1)
cv2.imwrite(imgpath.replace('.jpg', '_HoughLinesP.jpg'), show_img)
# 合并线段
lines = __merge_hough_lines(lines)
if __SHOW_PROCESS:
show_img = origin_img.copy()
for l in lines:
x1, y1, x2, y2 = l
x1, y1, x2, y2 = int(x1 * x_ratio), int(y1 * y_ratio), int(x2 * x_ratio), int(y2 * y_ratio)
cv2.line(show_img, (x1, y1), (x2, y2), (0, 0, 255), 1)
cv2.imwrite(imgpath.replace('.jpg', '__merge_hough_lines.jpg'), show_img)
# 查找矩形
rect_points = __find_rect(lines)
if rect_points is None:
return None
rect_points = np.array(rect_points)
rect_points[:, 0] = rect_points[:, 0] * x_ratio
rect_points[:, 1] = rect_points[:, 1] * y_ratio
rect_points.astype(np.int)
if __SHOW_PROCESS:
show_img = origin_img.copy()
for (x1, y1), (x2, y2) in [(rect_points[0], rect_points[1]),
(rect_points[1], rect_points[2]),
(rect_points[2], rect_points[3]),
(rect_points[3], rect_points[0]),
]:
cv2.line(show_img, (x1, y1), (x2, y2), (0, 0, 255), 2)
cv2.imwrite(imgpath.replace('.jpg', '__rect.jpg'), show_img)
# 面积过滤
cnt_area_size = cv2.contourArea(rect_points)
total_area_size = origin_img.shape[0] * origin_img.shape[1]
if cnt_area_size / total_area_size < 0.2 or cnt_area_size / total_area_size > 0.99:
return None
return rect_points
except:
logging.exception('edge_detect exception!!!')
return None
后处理C++(OpenCV)版本:
#include "fm_ocr_scanner.hpp"
#include
#include
//在具体调用的地方,看这几个常量的解释
const int kHoughLinesPThreshold = 20;
const double kHoughLinesPMinLinLength = 20.0;
const double kHoughLinesPMaxLineGap = 3.0;
const int kMergeLinesMaxDistance = 5;
const int kIntersectionMinAngle = 45;
const int kIntersectionMaxAngle = 135;
const double kCloserPointMaxDistance = 6.0;
const double kRectOpposingSidesMinRatio = 0.5;
const int kPointOnLineMaxOffset = 8;
const int kSameSegmentsMaxAngle = 5;
struct Corner {
cv::Point point;
std::vector segments;
};
static bool IsPointOnLine(const cv::Point point, const cv::Vec4i line) {
cv::Point p0 = cv::Point(line[0], line[1]);
cv::Point p1 = cv::Point(line[2], line[3]);
int min_x, max_x, min_y, max_y;
min_x = MIN(p0.x, p1.x) - kPointOnLineMaxOffset;//在HED和霍夫曼检测的时候,矩形的拐角处的两条线段,可能会断开,所以这里在line的两端,适当的延长一点点距离
max_x = MAX(p0.x, p1.x) + kPointOnLineMaxOffset;
min_y = MIN(p0.y, p1.y) - kPointOnLineMaxOffset;
max_y = MAX(p0.y, p1.y) + kPointOnLineMaxOffset;
if (point.x >= min_x && point.x <= max_x && point.y >= min_y && point.y <= max_y) {
return true;
}
return false;
}
//https://gist.github.com/ceykmc/18d3f82aaa174098f145
static std::array Cross(const std::array &a,
const std::array &b) {
std::array result;
result[0] = a[1] * b[2] - a[2] * b[1];
result[1] = a[2] * b[0] - a[0] * b[2];
result[2] = a[0] * b[1] - a[1] * b[0];
return result;
}
//这个版本,line 是看成一条可以无限延长的直线
static bool GetIntersection(const cv::Vec4i &line_a, const cv::Vec4i &line_b, cv::Point &intersection) {
std::array pa{ { line_a[0], line_a[1], 1 } };
std::array pb{ { line_a[2], line_a[3], 1 } };
std::array la = Cross(pa, pb);
pa[0] = line_b[0], pa[1] = line_b[1], pa[2] = 1;
pb[0] = line_b[2], pb[1] = line_b[3], pb[2] = 1;
std::array lb = Cross(pa, pb);
std::array inter = Cross(la, lb);
if (inter[2] == 0) return false; // two lines are parallel
else {
intersection.x = inter[0] / inter[2];
intersection.y = inter[1] / inter[2];
return true;
}
}
//这个版本,line实际上是有限长度的线段,所以还额外检测了一下 point 是否在线段上
static bool GetSegmentIntersection(const cv::Vec4i &line_a, const cv::Vec4i &line_b, cv::Point &intersection) {
std::array pa{ { line_a[0], line_a[1], 1 } };
std::array pb{ { line_a[2], line_a[3], 1 } };
std::array la = Cross(pa, pb);
pa[0] = line_b[0];
pa[1] = line_b[1];
pa[2] = 1;
pb[0] = line_b[2];
pb[1] = line_b[3];
pb[2] = 1;
std::array lb = Cross(pa, pb);
std::array inter = Cross(la, lb);
if (inter[2] == 0) return false; // two lines are parallel
else {
intersection.x = inter[0] / inter[2];
intersection.y = inter[1] / inter[2];
if (IsPointOnLine(intersection, line_a) == true && IsPointOnLine(intersection, line_b) == true) {
return true;
}
return false;
}
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
/**
atan2的返回值,并不是0~360度,而是0~180度 和/或 0~-180度,之前就是掉到这个坑里了,所以需要修正一下
收集的一些真实数据,注意第二行的 (-176.228, 173.9),一个正一个负,数值其实是正确的,因为两条线都接近于水平线,一个向上倾斜一个向下倾斜,也就是一个小于180度,一个大于180度,在数学上是等价于用一个正角度表示小于180的,用一个负角度表示大于180的那个角。
------- debug, (angle_top, angle_bottom) are: (178.652, 179.599), (angle_right, angle_left) are: (-101.136, -75.3236)
------- debug, (angle_top, angle_bottom) are: (-176.228, 173.9), (angle_right, angle_left) are: (-114.411, -97.219)
------- debug, (angle_top, angle_bottom) are: (-142.927, -157.126), (angle_right, angle_left) are: (-62.549, -54.9165)
------- debug, (angle_top, angle_bottom) are: (-176.576, -179.441), (angle_right, angle_left) are: (-107.324, -64.6538)
*/
/**
下面这两个是未修正版本,之前就是在这里引出了 bug
static double GetAngleOfLine(const cv::Vec4i &line) {
int x1 = line[0], y1 = line[1], x2 = line[2], y2 = line[3];
double angle = atan2(y2 - y1, x2 - x1) * 180.0 / CV_PI;
return angle;
}
static double GetAngleOfTwoPoints(const cv::Point &point_a, const cv::Point &point_b) {
double angle = atan2(point_b.y - point_a.y, point_b.x - point_a.x) * 180.0 / CV_PI;
return angle;
}
*/
//http://opencv-users.1802565.n2.nabble.com/Angle-between-2-lines-td6803229.html
//http://stackoverflow.com/questions/2339487/calculate-angle-of-2-points
static int GetAngleOfLine(const cv::Vec4i &line) {
int x1 = line[0], y1 = line[1], x2 = line[2], y2 = line[3];
//http://stackoverflow.com/questions/1311049/how-to-map-atan2-to-degrees-0-360
//degrees = (degrees + 360) % 360 这种修正办法,得到的是 int 类型的角度,虽然损失了一点点精度,但是还是可以满足这里算法的需求
double angle = atan2(y2 - y1, x2 - x1) * 180.0 / CV_PI;
int fix_angle = ((int)angle + 360) % 360;
assert(fix_angle >= 0);
assert(fix_angle <= 360);
return fix_angle;
}
static int GetAngleOfTwoPoints(const cv::Point &point_a, const cv::Point &point_b) {
double angle = atan2(point_b.y - point_a.y, point_b.x - point_a.x) * 180.0 / CV_PI;
int fix_angle = ((int)angle + 360) % 360;
assert(fix_angle >= 0);
assert(fix_angle <= 360);
return fix_angle;
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
/**
RefLineVec4i 比较特殊,如果把它看成向量的话,它的反向是遵守一定的规则的。
RefLineVec4i 里面的两个点,总是从左往右的方向,如果 RefLine 和 Y 轴平行(区分不了左右),则按照从下往上的方向
*/
typedef cv::Vec4i RefLineVec4i;
static bool IsTwoRefLineCloseToEachOther(RefLineVec4i line_a, RefLineVec4i line_b) {
if (std::abs(line_a[1] - line_b[1]) < kMergeLinesMaxDistance && std::abs(line_a[3] - line_b[3]) < kMergeLinesMaxDistance) {
return true;
}
return false;
}
static RefLineVec4i GetRefLine(const cv::Vec4i line, int image_width, int image_height) {
/**
line format is (x_{start}, y_{start}, x_{end}, y_{end})
分别对应 line[0] line[1] line[2] line[3]
公式为 y = a*x + b
根据下面两个等式
line[1] = a*line[0] + b
line[3] = a*line[2] + b
可以进行推导
line[1] - line[3] = a* (line[0] - line[2])
得到
a = (line[1] - line[3]) / (line[0] - line[2])
b = line[1] - a*line[0]
= (line[0]*line[3] - line[2]*line[1]) / (line[0] - line[2])
*/
RefLineVec4i ref_line;
if (line[0] == line[2]) {
//和 Y 轴平行的线,按照从下往上的方向排列
ref_line[0] = line[0];
ref_line[1] = 0; //从下往上
ref_line[2] = line[2];
ref_line[3] = image_height;
} else if (line[1] == line[3]) {
//和 X 轴平行的线,按照从左往右的方向
ref_line[0] = 0; //从左往右
ref_line[1] = line[1];
ref_line[2] = image_width;
ref_line[3] = line[3];
} else {
//这个分支的斜线才能通过公式进行计算,而且不会遇到下面这个除法中 (line[0] - line[2]) == 0 的情况,避免计算错误
//a = (line[1] - line[3]) / (line[0] - line[2])
float a, b;
a = (float)(line[1] - line[3]) / (float)(line[0] - line[2]);
b = (float)(line[0]*line[3] - line[2]*line[1]) / (float)(line[0] - line[2]);
// y = a*x + b
ref_line[0] = 0; //从左往右
ref_line[1] = int(b);
ref_line[2] = int((image_height - b) / a);
ref_line[3] = image_height;// ref_line[3] = a*ref_line[2] + b
//std::cout << "__ ref_line are: (" << ref_line[0] << ", " << ref_line[1] << ", " << ref_line[2] << ", " << ref_line[3] << ")" << std::endl;
}
return ref_line;
}
static bool SortPointsByXaxis(const cv::Point &a, const cv::Point &b) {
return a.x < b.x;
}
static bool SortPointsByYaxis(const cv::Point &a, const cv::Point &b) {
return a.y < b.y;
}
static bool SortCornersByXaxis(const Corner &a, const Corner &b) {
return a.point.x < b.point.x;
}
static bool SortCornersByYaxis(const Corner &a, const Corner &b) {
return a.point.y < b.point.y;
}
static bool IsSegmentsHasSameSegment(const std::vector segments, const cv::Vec4i segment, int image_width) {
for (int i = 0; i < segments.size(); i++) {
cv::Vec4i seg = segments[i];
int angle_a = GetAngleOfLine(seg);
int angle_b = GetAngleOfLine(segment);
int diff = std::abs(angle_a - angle_b);
diff = diff % 90;//修正到0~90度
//std::cout << " ********************, angle_a, angle_b are: (" << angle_a << ", " << angle_b << "), diff is: " << diff << std::endl;
if (diff < kSameSegmentsMaxAngle || diff > (90 - kSameSegmentsMaxAngle)) {
return true;
}
}
//TODO,还可以考虑是否需要更严格的判断策略
return false;
}
/**
HoughLinesP检测出来的是线段,是有长度的。
把每一个线段扩展成一个统一规格的 RefLineVec4i,形成一个 pair,然后调用这个函数,对这些 pair 进行合并。
用RefLineVec4i来决策是否需要合并,对于需要合并的,则把对应的HoughLinesP线段重组成一个更长的线段。
*/
static std::vector > MergeRefLineAndSegmentPairs(std::vector > ref_line_and_segment_pairs, int image_width, int image_height) {
std::vector > merged_ref_line_and_segment_pairs;
for (int i = 0; i < ref_line_and_segment_pairs.size(); i++) {
std::tuple ref_line_and_segment = ref_line_and_segment_pairs[i];
auto ref_line = std::get<0>(ref_line_and_segment);
auto segment = std::get<1>(ref_line_and_segment);
if (merged_ref_line_and_segment_pairs.size() == 0) {
merged_ref_line_and_segment_pairs.push_back(std::make_tuple(ref_line, segment));
} else {
bool isCloser = false;
for (int j = 0; j < merged_ref_line_and_segment_pairs.size(); j++) {
auto merged_ref_line_and_segment = merged_ref_line_and_segment_pairs[j];
auto merged_ref_line = std::get<0>(merged_ref_line_and_segment);
auto merged_segment = std::get<1>(merged_ref_line_and_segment);
//std::cout << "debug, std::abs(line[1] - merged_line[1]) " << std::abs(line[1] - merged_line[1]) << ", std::abs(line[3] - merged_line[3]) " << std::abs(line[3] - merged_line[3]) << std::endl;
if (IsTwoRefLineCloseToEachOther(ref_line, merged_ref_line) == true) {
//如果两条 ref line 很接近,则把两个segment合并成一个,然后重新生成新的 ref line
//先取出4个点
cv::Point p0 = cv::Point(segment[0], segment[1]);
cv::Point p1 = cv::Point(segment[2], segment[3]);
cv::Point p2 = cv::Point(merged_segment[0], merged_segment[1]);
cv::Point p3 = cv::Point(merged_segment[2], merged_segment[3]);
std::vector point_vector;
point_vector.push_back(p0);
point_vector.push_back(p1);
point_vector.push_back(p2);
point_vector.push_back(p3);
//排序之后,得到最左边和最右边的两个 point,这两个 point 就可以构成新的线段
std::sort(point_vector.begin(), point_vector.end(), SortPointsByXaxis);
cv::Point left_most_point = point_vector[0];
cv::Point right_most_point = point_vector[3];
cv::Vec4i new_segment;
new_segment[0] = left_most_point.x;
new_segment[1] = left_most_point.y;
new_segment[2] = right_most_point.x;
new_segment[3] = right_most_point.y;
//TODO,考虑一下,这里是否需要使用其他的线段合并策略,是否需要把新的线段的两个 point,做一个细微调整,让这两个 point 正好处于新的直线上
RefLineVec4i new_ref_line = GetRefLine(new_segment, image_width, image_height);
merged_ref_line_and_segment_pairs[j] = std::make_tuple(new_ref_line, new_segment);
isCloser = true;
break;
}
}
if (isCloser == false) {
merged_ref_line_and_segment_pairs.push_back(std::make_tuple(ref_line, segment));
}
}
}
return merged_ref_line_and_segment_pairs;
}
static double PointsDistance(const cv::Point &a, const cv::Point &b) {
double x_distance = (double)a.x - (double)b.x;
double y_distance = (double)a.y - (double)b.y;
double distance = cv::sqrt(x_distance * x_distance + y_distance * y_distance);
//std::cout << " -- pointsDistance, [x_distance, y_distance, distance] are: [" << x_distance << ", " << y_distance << ", " << distance << "]" << std::endl;
return distance;
}
/**
按照顺时针排序,对4个 corner 排序,得到 4 corners: top-left, top-right, bottom-right, bottom-left, index are 0, 1, 2, 3
*/
static std::vector ArrangeRectCorners(std::vector rect_corners) {
assert(rect_corners.size() == 4);
std::sort(rect_corners.begin(), rect_corners.end(), SortCornersByXaxis);
std::vector left_two_corners;
std::vector right_two_corners;
left_two_corners.push_back(rect_corners[0]);
left_two_corners.push_back(rect_corners[1]);
right_two_corners.push_back(rect_corners[2]);
right_two_corners.push_back(rect_corners[3]);
std::sort(left_two_corners.begin(), left_two_corners.end(), SortCornersByYaxis);
std::sort(right_two_corners.begin(), right_two_corners.end(), SortCornersByYaxis);
std::vector sorted_corners;// top-left, top-right, bottom-right, bottom-left
sorted_corners.push_back(left_two_corners[0]);
sorted_corners.push_back(right_two_corners[0]);
sorted_corners.push_back(right_two_corners[1]);
sorted_corners.push_back(left_two_corners[1]);
return sorted_corners;
}
/**
一组策略,判断4个 corner 是否可以形成一个可信度高的矩形(有透视变换效果,所以肯定不是标准的长方形,而是一个梯形或平行四边形)
4个 point 是已经经过ArrangeRectPoints排过序的
4 points top-left, top-right, bottom-right, bottom-left, index are 0, 1, 2, 3
*/
static bool IsRectCornersReasonable(std::vector rect_corners, int image_width) {
assert(rect_corners.size() == 4);
//第一组策略,根据之前记录的 segment 和四边形每条边的相似度进行过滤
std::vector rect_points;
rect_points.push_back(rect_corners[0].point);
rect_points.push_back(rect_corners[1].point);
rect_points.push_back(rect_corners[2].point);
rect_points.push_back(rect_corners[3].point);
cv::Vec4i segment_0_to_1 = cv::Vec4i(rect_points[0].x, rect_points[0].y, rect_points[1].x, rect_points[1].y);
cv::Vec4i segment_1_to_2 = cv::Vec4i(rect_points[1].x, rect_points[1].y, rect_points[2].x, rect_points[2].y);
cv::Vec4i segment_2_to_3 = cv::Vec4i(rect_points[2].x, rect_points[2].y, rect_points[3].x, rect_points[3].y);
cv::Vec4i segment_3_to_0 = cv::Vec4i(rect_points[3].x, rect_points[3].y, rect_points[0].x, rect_points[0].y);
std::vector rect_segments;
rect_segments.push_back(segment_0_to_1);
rect_segments.push_back(segment_1_to_2);
rect_segments.push_back(segment_2_to_3);
rect_segments.push_back(segment_3_to_0);
/**
segment_0_to_1这条线段,应该和rect_corners[0]的所有 segments 里面的至少一条线段是相似的,同时,
segment_0_to_1这条线段,也应该和rect_corners[1]的所有 segments 里面的至少一条线段是相似的
*/
if (IsSegmentsHasSameSegment(rect_corners[0].segments, segment_0_to_1, image_width) &&
IsSegmentsHasSameSegment(rect_corners[1].segments, segment_0_to_1, image_width)) {
} else {
return false;
}
if (IsSegmentsHasSameSegment(rect_corners[1].segments, segment_1_to_2, image_width) &&
IsSegmentsHasSameSegment(rect_corners[2].segments, segment_1_to_2, image_width)) {
} else {
return false;
}
if (IsSegmentsHasSameSegment(rect_corners[2].segments, segment_2_to_3, image_width) &&
IsSegmentsHasSameSegment(rect_corners[3].segments, segment_2_to_3, image_width)) {
} else {
return false;
}
if (IsSegmentsHasSameSegment(rect_corners[3].segments, segment_3_to_0, image_width) &&
IsSegmentsHasSameSegment(rect_corners[0].segments, segment_3_to_0, image_width)) {
} else {
return false;
}
//第二组策略,根据四边形的形状
double distance_of_0_to_1 = PointsDistance(rect_points[0], rect_points[1]);
double distance_of_1_to_2 = PointsDistance(rect_points[1], rect_points[2]);
double distance_of_2_to_3 = PointsDistance(rect_points[2], rect_points[3]);
double distance_of_3_to_0 = PointsDistance(rect_points[3], rect_points[0]);
//计算两组对边的比例(0.0 -- 1.0的值)
//两条对边(标准矩形的时候,就是两条平行边)的 minLength / maxLength,不能小于0.5,否则就认为不是矩形(本来是把这个阈值设置为0.8的,但是因为图片都是缩放后进行的处理,长宽比有很大的变化,所以把这里的过滤条件放宽一些,设置为0.5)
//distance_of_0_to_1 和 distance_of_2_to_3 是两条对边
double ratio1 = MIN(distance_of_0_to_1, distance_of_2_to_3) / MAX(distance_of_0_to_1, distance_of_2_to_3);
double ratio2 = MIN(distance_of_1_to_2, distance_of_3_to_0) / MAX(distance_of_1_to_2, distance_of_3_to_0);
//std::cout << " ------- debug, distance_of_1_to_2 and distance_of_3_to_0 are: (" << distance_of_1_to_2 << ", " << distance_of_3_to_0 << ")" << std::endl;
//std::cout << " ------- debug, ratio1 and ratio2 are: (" << ratio1 << ", " << ratio2 << ")" << std::endl;
if ((ratio1 >= kRectOpposingSidesMinRatio) && (ratio2 >= kRectOpposingSidesMinRatio)) {
//两组对边,至少有一组是接近平行状态的(根据透视角度的不同,四边形是一个梯形或者平行四边形)
//用这个再做一轮判断
int angle_top, angle_bottom, angle_left, angle_right;//4条边和水平轴的夹角
angle_top = GetAngleOfTwoPoints(rect_points[1], rect_points[0]);
angle_bottom = GetAngleOfTwoPoints(rect_points[2], rect_points[3]);
angle_right = GetAngleOfTwoPoints(rect_points[2], rect_points[1]);
angle_left = GetAngleOfTwoPoints(rect_points[3], rect_points[0]);
//std::cout << "\n\n ------- debug, (angle_top, angle_bottom) are: (" << angle_top << ", " << angle_bottom << "), (angle_right, angle_left) are: (" << angle_right << ", " << angle_left << ")" << std::endl;
int diff1 = std::abs(angle_top - angle_bottom);
int diff2 = std::abs(angle_right - angle_left);
diff1 = diff1 % 90;
diff2 = diff2 % 90;//修正到0~90度
//std::cout << " ---------------debug, diff1 and diff2 are: [" << diff1 << ", " << diff2 << "]" << std::endl;
//这里的几个值,都是经验值
if (diff1 <= 8 && diff2 <= 8) {
//俯视拍摄,平行四边形
return true;
}
if (diff1 <= 8 && diff2 <= 45) {
//梯形,有透视效果
return true;
}
if (diff1 <= 45 && diff2 <= 8) {
//梯形,有透视效果
return true;
}
}
return false;
}
#define ENABLE_DEBUG_MODE
std::tuple, std::vector > ProcessEdgeImage(cv::Mat edge_image, cv::Mat color_image, bool draw_debug_image) {
assert(edge_image.rows == color_image.rows);
assert(edge_image.cols == color_image.cols);
int height = edge_image.rows;
int width = edge_image.cols;
std::vector results;
std::vector debug_images;
#ifdef ENABLE_DEBUG_MODE
cv::Mat lines_image, corners_image, rect_image;
if (draw_debug_image) {
lines_image = color_image.clone();
corners_image = color_image.clone();
rect_image = color_image.clone();
}
#endif
/**
find rectangles
http://blog.ayoungprogrammer.com/2013/04/tutorial-detecting-multiple-rectangles.html/
https://github.com/bsdnoobz/opencv-code/blob/master/quad-segmentation.cpp
http://monkeycoding.com/?p=656
*/
//<1>0.0~1.0/float类型的image,转换成0~255/int类型
cv::Mat gray_image;
edge_image.convertTo(gray_image, CV_8UC1, 255.0);//http://stackoverflow.com/questions/22117267/how-to-convert-an-image-to-a-float-image-in-opencv http://stackoverflow.com/questions/6302171/convert-uchar-mat-to-float-mat-in-opencv
//<2>找线段
cv::Mat binary_image;
threshold(gray_image, binary_image, 128, 255, cv::THRESH_BINARY); //HoughLinesP的输入 mat 是二值化的
gray_image = binary_image;
/**
vector lines;
HoughLinesP(dst, lines, 1, CV_PI/180, 50, 50, 10 );
with the arguments:
dst: Output of the edge detector. It should be a grayscale image (although in fact it is a binary one)
lines: A vector that will store the parameters (x_{start}, y_{start}, x_{end}, y_{end}) of the detected lines
rho : The resolution of the parameter r in pixels. We use 1 pixel.
theta: The resolution of the parameter \theta in radians. We use 1 degree (CV_PI/180)
threshold: The minimum number of intersections to “detect” a line
minLinLength: The minimum number of points that can form a line. Lines with less than this number of points are disregarded.
maxLineGap: The maximum gap between two points to be considered in the same line.
*/
std::vector linesP;
cv::HoughLinesP(gray_image, linesP, 1, CV_PI * 1/180, kHoughLinesPThreshold, kHoughLinesPMinLinLength, kHoughLinesPMaxLineGap);//这组参数,可以检测到比较小的矩形,但是对干扰物就更敏感了,很容易检测出矩形框之外的短线段
//<3>线段转换成 参考直线(其实是正好被 image 完整尺寸截断的线段),并且做一轮过滤
std::vector > ref_line_and_segment_pairs;
for (int i = 0; i < linesP.size(); i++) {
cv::Vec4i segment = linesP[i];
RefLineVec4i ref_line = GetRefLine(segment, edge_image.cols, edge_image.rows);//线段延展成 参考线
//线段长度过滤
double segment_length = cv::sqrt(((float)segment[1] - segment[3]) * ((float)segment[1] - segment[3]) + ((float)segment[0] - segment[2]) * ((float)segment[0] - segment[2]));
if (segment_length > kHoughLinesPMinLinLength) {
ref_line_and_segment_pairs.push_back(std::make_tuple(ref_line, segment));
}
}
//<4>合并临近的直线
std::vector > merged_ref_line_and_segment_pairs = MergeRefLineAndSegmentPairs(ref_line_and_segment_pairs, edge_image.cols, edge_image.rows);
std::vector ref_lines;
std::vector segments;
for (int i = 0; i < ref_line_and_segment_pairs.size(); i++) {
std::tuple ref_line_and_segment = ref_line_and_segment_pairs[i];
auto ref_line = std::get<0>(ref_line_and_segment);
auto segment = std::get<1>(ref_line_and_segment);
ref_lines.push_back(ref_line);
segments.push_back(segment);
}
#ifdef ENABLE_DEBUG_MODE
if (draw_debug_image) {
for (int i = 0; i < segments.size(); i++) {
cv::Vec4i v = segments[i];
if (draw_debug_image) {
cv::line(lines_image, cv::Point(v[0], v[1]), cv::Point(v[2], v[3]), CV_RGB(0,255,0));
}
//std::cout << " ^^^^^^ debug, cv::Point(v[0], v[1]) is: " << cv::Point(v[0], v[1]) << ", cv::Point(v[2], v[3]) is: " << cv::Point(v[2], v[3]) << std::endl;
}
}
#endif
//<5>寻找segment线段的交叉点以及过滤
std::vector all_corners;
std::vector corners;
for (int i = 0; i < segments.size(); i++) {
for (int j = i + 1; j < segments.size(); j++) {
cv::Vec4i segment_a = segments[i], segment_b = segments[j];
//https://gist.github.com/ceykmc/18d3f82aaa174098f145 two lines intersection
//http://stackoverflow.com/questions/20677795/how-do-i-compute-the-intersection-point-of-two-lines-in-python
cv::Point intersection_point;
if (GetSegmentIntersection(segment_a, segment_b, intersection_point) == true) {
all_corners.push_back(intersection_point);
//对交叉点进行第一轮过滤
if (intersection_point.x <= 0 || intersection_point.y <= 0
|| intersection_point.x >= width || intersection_point.y >= height) {
//std::cout << "^^^^^^^^^^^^^^ pointer <= 0, do not need " << std::endl;
//交叉点如果在图片外部,也需要过滤掉
} else {
int thetaA = GetAngleOfLine(segment_a);
int thetaB = GetAngleOfLine(segment_b);
int angle = std::abs(thetaA - thetaB);
angle = angle % 180;//再修正到180度范围内
//std::cout << " ------- debug, (thetaA, thetaB) are: (" << thetaA << ", " << thetaB << "), two line angle is " << angle << std::endl;
if (angle >= kIntersectionMinAngle && angle <= kIntersectionMaxAngle) {
//基于两条线的角度进行过滤
Corner c = Corner();
c.point = intersection_point;
c.segments.push_back(segment_a);
c.segments.push_back(segment_b);
corners.push_back(c);
}
}
}
}
}
//对交叉点进行第二轮过滤,两个点如果很接近,则合并成同一个点,并且用他们的平均值来标示该点
std::vector average_corners;
for(int i = 0; i < corners.size(); i++) {
Corner corner = corners[i];
if (average_corners.size() == 0) {
average_corners.push_back(corner);
} else {
bool isCloser = false;
for (int j = 0; j < average_corners.size(); j++) {
Corner c = average_corners[j];
cv::Point diff = corner.point - c.point;
double distance = cv::sqrt(diff.x*diff.x + diff.y*diff.y);
//std::cout << " _____ debug, distance is: " << distance << std::endl;
if (distance < kCloserPointMaxDistance) {
//两个点很靠近,合并成同一个点
Corner newCornet = Corner();
newCornet.point = cv::Point((corner.point.x + c.point.x) / 2, (corner.point.y + c.point.y) / 2);
//还要合并每个 cornet 的 segment 线段数组
std::vector segment_a = corner.segments;
std::vector segment_b = c.segments;
//这种办法合并数组更高效
//http://stackoverflow.com/questions/2551775/appending-a-vector-to-a-vector
newCornet.segments.insert(newCornet.segments.end(), segment_a.begin(), segment_a.end());
newCornet.segments.insert(newCornet.segments.end(), segment_b.begin(), segment_b.end());
average_corners[j] = newCornet;
isCloser = true;
break;
}
}
if (isCloser == false) {
average_corners.push_back(corner);
}
}
}
//std::cout << "debug, all_corners " << all_corners.size() << ", corners " << corners.size() << ", average_corners " << average_corners.size() << std::endl;
#ifdef ENABLE_DEBUG_MODE
if (draw_debug_image) {
for (int i = 0; i < average_corners.size(); i++) {
Corner corner = average_corners[i];
cv::circle(corners_image, corner.point, 3, CV_RGB(255,0,0), 2);
//std::cout << "average_corners i = " << i << ", point is: " << corner.point << ", segment size is: " << corner.segments.size() << std::endl;
for (int j = 0; j < corner.segments.size(); j++) {
cv::Vec4i v = corner.segments[j];
cv::line(corners_image, cv::Point(v[0], v[1]), cv::Point(v[2], v[3]), CV_RGB(0,0,255));
}
}
}
#endif
//<6>寻找四边形
if (average_corners.size() >= 4) {
//至少要有4个点,才算是矩形(TODO,如果点太多,还会影响计算性能,所以可能还需要一个上限值,并且,当达到上限值的时候,还需要考虑如何进一步处理,减少点的数量)
double maxPerimeter = 0.0;
std::vector rect_corners;
std::vector rect_corners_with_max_perimeter;
std::vector rect_points_with_max_perimeter;
//4重循环的计算量还是有点大
for(int i = 0; i <= average_corners.size() - 4; i++) {
for(int j = i + 1; j <= average_corners.size() - 3; j++) {
for(int m = j + 1; m <= average_corners.size() - 2; m++) {
for(int n = m + 1; n <= average_corners.size() - 1; n++) {
rect_corners.clear();
rect_corners.push_back(average_corners[i]);
rect_corners.push_back(average_corners[j]);
rect_corners.push_back(average_corners[m]);
rect_corners.push_back(average_corners[n]);
//对四个点按照顺时针方向排序
rect_corners = ArrangeRectCorners(rect_corners);
//如果不是一个合理的四边形,则直接排除
if (IsRectCornersReasonable(rect_corners, edge_image.cols) == false) {
continue;
}
std::vector rect_points;
rect_points.push_back(rect_corners[0].point);
rect_points.push_back(rect_corners[1].point);
rect_points.push_back(rect_corners[2].point);
rect_points.push_back(rect_corners[3].point);
double perimeter = contourArea(rect_points);//或者用最大面积
//double perimeter = arcLength(rect_points, true);//最大周长
//std::cout << "#############debug, perimeter is: " << perimeter << std::endl;
if (perimeter > maxPerimeter) {
maxPerimeter = perimeter;
rect_corners_with_max_perimeter = rect_corners;
rect_points_with_max_perimeter = rect_points;
}
}
}
}
}
if (rect_points_with_max_perimeter.size() == 4) {
#ifdef ENABLE_DEBUG_MODE
if (draw_debug_image) {
const cv::Point *pts = (const cv::Point*) cv::Mat(rect_points_with_max_perimeter).data;
int npts = cv::Mat(rect_points_with_max_perimeter).rows;
polylines(rect_image, &pts, &npts, 1,
true,
cv::Scalar(0, 255, 255),
2,
CV_AA, 0);
}
#endif
results = rect_points_with_max_perimeter;
}
}
#ifdef ENABLE_DEBUG_MODE
if (draw_debug_image) {
debug_images.push_back(gray_image);
debug_images.push_back(lines_image);
debug_images.push_back(corners_image);
debug_images.push_back(rect_image);
}
#endif
bool find_rect = results.size() == 4 ? true : false;
return std::make_tuple(find_rect, results, debug_images);
}
相关链接(感谢C++版本作者):
1.http://fengjian0106.github.io/2017/05/08/Document-Scanning-With-TensorFlow-And-OpenCV/
2.http://fengjian0106.github.io/2018/06/02/Document-Scanning-With-TensorFlow-And-OpenCV-Part-Two/
3.https://github.com/fengjian0106/hed-tutorial-for-document-scanning