基于语义分割与边缘检测的通用图像缺陷检测处理框架

整体架构:

基于语义分割与边缘检测的通用图像缺陷检测处理框架_第1张图片

Unet网络架构:

import tensorflow as tf


def convolutional(input_data, filters_shape, trainable, name, downsample=False, activate=True, bn=True):

    with tf.variable_scope(name):
        if downsample:
            pad_h, pad_w = (filters_shape[0] - 2) // 2 + 1, (filters_shape[1] - 2) // 2 + 1
            paddings = tf.constant([[0, 0], [pad_h, pad_h], [pad_w, pad_w], [0, 0]])
            input_data = tf.pad(input_data, paddings, 'CONSTANT')
            strides = (1, 2, 2, 1)
            padding = 'VALID'
        else:
            strides = (1, 1, 1, 1)
            padding = "SAME"

        weight = tf.get_variable(name='weight', dtype=tf.float32, trainable=True,
                                 shape=filters_shape, initializer=tf.random_normal_initializer(stddev=0.01))
        conv = tf.nn.conv2d(input=input_data, filter=weight, strides=strides, padding=padding)

        if bn:
            conv = tf.layers.batch_normalization(conv, beta_initializer=tf.zeros_initializer(),
                                                 gamma_initializer=tf.ones_initializer(),
                                                 moving_mean_initializer=tf.zeros_initializer(),
                                                 moving_variance_initializer=tf.ones_initializer(), training=trainable)
        else:
            bias = tf.get_variable(name='bias', shape=filters_shape[-1], trainable=True,
                                   dtype=tf.float32, initializer=tf.constant_initializer(0.0))
            conv = tf.nn.bias_add(conv, bias)

        if activate is True:
            conv = tf.nn.leaky_relu(conv, alpha=0.1)

    return conv


def upsample(input_data, name, method="deconv"):
    assert method in ["resize", "deconv"]
    if method == "resize":
        with tf.variable_scope(name):
            input_shape = tf.shape(input_data)
            output = tf.image.resize_nearest_neighbor(input_data, (input_shape[1] * 2, input_shape[2] * 2))

    if method == "deconv":
        # replace resize_nearest_neighbor with conv2d_transpose To support TensorRT optimization
        numm_filter = input_data.shape.as_list()[-1]
        output = tf.layers.conv2d_transpose(input_data, numm_filter//2, kernel_size=4, padding='same',
                                            strides=(2, 2), kernel_initializer=tf.random_normal_initializer())

    return output


def Unet(images, filters=8, name='unet'):
    with tf.variable_scope(name):
        endpoints = {}
        conv = convolutional(images, [3, 3, 3, filters], trainable=True, name='conv1')
        conv = convolutional(conv, [3, 3, filters, filters], trainable=True, name='conv2')
        endpoints['C1'] = conv
        # downsample 1
        conv = convolutional(conv, [3, 3, filters, filters], trainable=True, name='conv3', downsample=True)
        conv = convolutional(conv, [3, 3, filters, filters * 2], trainable=True, name='conv4')
        conv = convolutional(conv, [3, 3, filters * 2, filters * 2], trainable=True, name='conv5')
        endpoints['C2'] = conv
        # downsample 2
        conv = convolutional(conv, [3, 3, filters * 2, filters * 2], trainable=True, name='conv6', downsample=True)
        conv = convolutional(conv, [3, 3, filters * 2, filters * 4], trainable=True, name='conv7')
        conv = convolutional(conv, [3, 3, filters * 4, filters * 4], trainable=True, name='conv8')
        endpoints['C3'] = conv
        # downsample 3
        conv = convolutional(conv, [3, 3, filters * 4, filters * 4], trainable=True, name='conv9', downsample=True)
        conv = convolutional(conv, [3, 3, filters * 4, filters * 8], trainable=True, name='conv10')
        conv = convolutional(conv, [3, 3, filters * 8, filters * 8], trainable=True, name='conv11')
        endpoints['C4'] = conv
        # downsample 4
        conv = convolutional(conv, [3, 3, filters * 8, filters * 8], trainable=True, name='conv12', downsample=True)
        conv = convolutional(conv, [3, 3, filters * 8, filters * 16], trainable=True, name='conv13')
        conv = convolutional(conv, [3, 3, filters * 16, filters * 16], trainable=True, name='conv14')
        endpoints['C5'] = conv

        conv = convolutional(conv, [3, 3, filters * 16, filters * 16], trainable=True, name='conv15', downsample=True)
        conv = convolutional(conv, [3, 3, filters * 16, filters * 32], trainable=True, name='conv16')
        conv = convolutional(conv, [3, 3, filters * 32, filters * 32], trainable=True, name='conv17')
        endpoints['C6'] = conv

        conv = convolutional(conv, [3, 3, filters * 32, filters * 32], trainable=True, name='conv18', downsample=True)
        conv = convolutional(conv, [3, 3, filters * 32, filters * 64], trainable=True, name='conv19')
        conv = convolutional(conv, [3, 3, filters * 64, filters * 64], trainable=True, name='conv20')
        endpoints['C7'] = conv


        for i in range(7, 1, -1):
            with tf.variable_scope('Ronghe%d' % i):
                uplayer = upsample(conv, 'deconv%d' % (8-i), method="deconv")
                concat = tf.concat([endpoints['C%d' % (i-1)], uplayer], axis=-1)
                dim = concat.get_shape()[-1].value
                conv = convolutional(concat, [3, 3, dim, dim//2], trainable=True, name='conv1')
                conv = convolutional(conv, [3, 3, dim//2, dim//2], trainable=True, name='conv2')
        out = convolutional(conv, [3, 3, dim//2, 1], trainable=True, name='out', activate=False, bn=False)

    return out

HED网络架构:

# hed演进版本RCF
def charsiu2_binary_net(input_image, is_training):
    filter_initializer = tf.contrib.layers.xavier_initializer()
    activation_func = tf.nn.relu6

    def conv2d(inputs, filters, kernel_size, stride, scope=''):
        with tf.variable_scope(scope):
            with tf.variable_scope('conv2d'):
                outputs = tf.layers.conv2d(inputs,
                                        filters,
                                        kernel_size,
                                        strides=(stride, stride),
                                        padding='same',
                                        activation=None,
                                        use_bias=False,
                                        kernel_initializer=filter_initializer)

                outputs = tf.layers.batch_normalization(outputs, training=is_training)
                outputs = tf.nn.relu(outputs)
            return outputs

    def _1x1_conv2d(inputs, filters, stride):
        kernel_size = [1, 1]
        with tf.variable_scope('1x1_conv2d'):
            outputs = tf.layers.conv2d(inputs,
                                       filters,
                                       kernel_size,
                                       strides=(stride, stride),
                                       padding='same',
                                       activation=None,
                                       use_bias=False,
                                       kernel_initializer=filter_initializer)

            outputs = tf.layers.batch_normalization(outputs, training=is_training)
        return outputs

    def expansion_conv2d(inputs, expansion, stride):
        input_shape = inputs.get_shape().as_list()
        assert len(input_shape) == 4
        filters = input_shape[3] * expansion

        kernel_size = [1, 1]
        with tf.variable_scope('expansion_1x1_conv2d'):
            outputs = tf.layers.conv2d(inputs,
                                    filters,
                                    kernel_size,
                                    strides=(stride, stride),
                                    padding='same',
                                    activation=None,
                                    use_bias=False,
                                    kernel_initializer=filter_initializer)

            outputs = tf.layers.batch_normalization(outputs, training=is_training)
            outputs = activation_func(outputs)
        return outputs

    def projection_conv2d(inputs, filters, stride):
        kernel_size = [1, 1]
        with tf.variable_scope('projection_1x1_conv2d'):
            outputs = tf.layers.conv2d(inputs,
                                    filters,
                                    kernel_size,
                                    strides=(stride, stride),
                                    padding='same',
                                    activation=None,
                                    use_bias=False,
                                    kernel_initializer=filter_initializer)
            outputs = tf.layers.batch_normalization(outputs, training=is_training)
        return outputs

    def depthwise_conv2d(inputs,depthwise_conv_kernel_size, stride):
        with tf.variable_scope('depthwise_conv2d'):
            outputs = tf.contrib.layers.separable_conv2d(
                        inputs,
                        None,
                        depthwise_conv_kernel_size,
                        depth_multiplier=1,
                        stride=(stride, stride),
                        padding='SAME',
                        activation_fn=None,
                        weights_initializer=filter_initializer,
                        biases_initializer=None)
            outputs = tf.layers.batch_normalization(outputs, training=is_training)
            outputs = activation_func(outputs)
        return outputs

    def inverted_residual_block(inputs,filters,stride,expansion=6,scope=''):
        assert stride == 1 or stride == 2

        depthwise_conv_kernel_size = [3, 3]
        pointwise_conv_filters = filters

        with tf.variable_scope(scope):
            net = inputs
            net = expansion_conv2d(net, expansion, stride=1)
            net = depthwise_conv2d(net, depthwise_conv_kernel_size, stride=stride)
            net = projection_conv2d(net, pointwise_conv_filters, stride=1)

            if stride == 1:
                if net.get_shape().as_list()[3] != inputs.get_shape().as_list()[3]:
                    inputs = _1x1_conv2d(inputs, net.get_shape().as_list()[3], stride=1)

                net = net + inputs
                return net
            else:
                return net

    def _dsn_1x1_conv2d(inputs, filters):
        kernel_size = [1, 1]
        outputs = tf.layers.conv2d(inputs,
                                   filters,
                                   kernel_size,
                                   padding='same',
                                   activation=None,
                                   use_bias=False,
                                   kernel_initializer=filter_initializer,
                                   kernel_regularizer=None)

        outputs = tf.layers.batch_normalization(outputs, training=is_training)
        return outputs

    def _output_1x1_conv2d(inputs, filters):
        kernel_size = [1, 1]
        outputs = tf.layers.conv2d(inputs,
                                   filters,
                                   kernel_size,
                                   padding='same',
                                   activation=None,
                                   use_bias=True,
                                   kernel_initializer=filter_initializer,
                                   kernel_regularizer=None)
        return outputs

    def _dsn_deconv2d_with_upsample_factor(inputs, filters, upsample_factor):
        kernel_size = [2 * upsample_factor, 2 * upsample_factor]
        outputs = tf.layers.conv2d_transpose(inputs,
                                             filters,
                                             kernel_size,
                                             strides=(upsample_factor, upsample_factor),
                                             padding='same',
                                             activation=None,
                                             use_bias=True,
                                             kernel_initializer=filter_initializer,
                                             kernel_regularizer=None)
        return outputs

    def _dsn_1x1_conv2d_elementhwise(prev_layer_list, combine_fn=tf.add, scope='_dsn_1x1_conv2d_elementhwise'):
        outputs = _dsn_1x1_conv2d(prev_layer_list[0], 1)
        with tf.variable_scope(scope):
            for l in prev_layer_list[1:]:
                outputs = combine_fn(_dsn_1x1_conv2d(l, 1), l)

        return outputs

    # 网络结构定义
    with tf.variable_scope('Charsiu2Binary', values=[input_image]):
        net = input_image

        # mobilenet层
        with tf.variable_scope('mobilenet_v2'):
            net = conv2d(net, 3, [3,3], stride=1, scope='block0_0')
            net = conv2d(net, 6, [3,3], stride=1, scope='block0_1')

            dsn0 = net
            net = conv2d(net, 12, [3, 3], stride=2, scope='block0_2') # size/2
            dsn1_1 = net
            net = inverted_residual_block(net, 6, stride=1, expansion=1, scope='block1_0')
            dsn1_2 = net

            dsn2 = net
            net = inverted_residual_block(net, 12, stride=2, scope='block2_0') # size/4
            dsn2_1 = net
            net = inverted_residual_block(net, 12, stride=1, scope='block2_1')
            dsn2_2 = net

            dsn3 = net
            net = inverted_residual_block(net, 24, stride=2, scope='block3_0') # size/8
            dsn3_1 = net
            net = inverted_residual_block(net, 24, stride=1, scope='block3_1')
            dsn3_2 = net
            net = inverted_residual_block(net, 24, stride=1, scope='block3_2')
            dsn3_3 = net

            dsn4 = net
            net = inverted_residual_block(net, 48, stride=2, scope='block4_0')
            dsn4_1 = net
            net = inverted_residual_block(net, 48, stride=1, scope='block4_1')
            dsn4_2 = net
            net = inverted_residual_block(net, 48, stride=1, scope='block4_2')
            dsn4_3 = net
            net = inverted_residual_block(net, 48, stride=1, scope='block4_3')
            dsn4_4 = net

            net = inverted_residual_block(net, 64, stride=1, scope='block5_0')
            dsn5_1 = net
            net = inverted_residual_block(net, 64, stride=1, scope='block5_1')
            dsn5_2 = net
            net = inverted_residual_block(net, 64, stride=1, scope='block5_2')
            dsn5_3 = net
            dsn5 = net

        ## dsn层
        with tf.variable_scope('dsn0'):
            dsn0 = _dsn_1x1_conv2d(dsn0, 1)
            print(dsn0)

        with tf.variable_scope('dsn1'):
            dsn1_1 = _dsn_1x1_conv2d(dsn1_1, 3)
            dsn1_2 = _dsn_1x1_conv2d(dsn1_2, 3)
            dsn1 = _dsn_1x1_conv2d_elementhwise([dsn1_1, dsn1_2])
            dsn1 = _dsn_1x1_conv2d(dsn1, 1)
            dsn1 = _dsn_deconv2d_with_upsample_factor(dsn1, 1, upsample_factor = 2)
            print(dsn1)

        with tf.variable_scope('dsn2'):
            dsn2_1 = _dsn_1x1_conv2d(dsn2_1, 6)
            dsn2_2 = _dsn_1x1_conv2d(dsn2_2, 6)
            dsn2 = _dsn_1x1_conv2d_elementhwise([dsn2_1, dsn2_2])
            dsn2 = _dsn_1x1_conv2d(dsn2, 1)
            dsn2 = _dsn_deconv2d_with_upsample_factor(dsn2, 1, upsample_factor = 4)
            print(dsn2)

        with tf.variable_scope('dsn3'):
            dsn3_1 = _dsn_1x1_conv2d(dsn3_1, 12)
            dsn3_2 = _dsn_1x1_conv2d(dsn3_2, 12)
            dsn3_3 = _dsn_1x1_conv2d(dsn3_3, 12)
            dsn3 = _dsn_1x1_conv2d_elementhwise([dsn3_1, dsn3_2, dsn3_3])
            dsn3 = _dsn_1x1_conv2d(dsn3, 1)
            dsn3 = _dsn_deconv2d_with_upsample_factor(dsn3, 1, upsample_factor = 8)
            print(dsn3)

        with tf.variable_scope('dsn4'):
            dsn4_1 = _dsn_1x1_conv2d(dsn4_1, 24)
            dsn4_2 = _dsn_1x1_conv2d(dsn4_2, 24)
            dsn4_3 = _dsn_1x1_conv2d(dsn4_3, 24)
            dsn4_4 = _dsn_1x1_conv2d(dsn4_4, 24)
            dsn4 = _dsn_1x1_conv2d_elementhwise([dsn4_1, dsn4_2, dsn4_3, dsn4_4])
            dsn4 = _dsn_1x1_conv2d(dsn4, 1)
            dsn4 = _dsn_deconv2d_with_upsample_factor(dsn4, 1, upsample_factor = 16)
            print(dsn4)

        with tf.variable_scope('dsn5'):
            dsn5_1 = _dsn_1x1_conv2d(dsn5_1, 32)
            dsn5_2 = _dsn_1x1_conv2d(dsn5_2, 32)
            dsn5_3 = _dsn_1x1_conv2d(dsn5_3, 32)
            dsn5 = _dsn_1x1_conv2d_elementhwise([dsn5_1, dsn5_2, dsn5_3])
            dsn5 = _dsn_1x1_conv2d(dsn5, 1)
            dsn5 = _dsn_deconv2d_with_upsample_factor(dsn5, 1, upsample_factor = 16)
            print(dsn5)

        with tf.variable_scope('dsn_fuse'):
            dsn_fuse = tf.concat([dsn0, dsn1, dsn2, dsn3, dsn4, dsn5], 3)
            dsn_fuse = _output_1x1_conv2d(dsn_fuse, 1)

    return dsn_fuse

训练代码:

import re
import numpy as np
import cv2
import os
from nets.unet import Unet
from nets.modelnet import model
from nets.graph_define import charsiu_binary_net
from nets.lossfunc import *
from nets.func import params_usage
from tools import img_mask_color_blending
from tfsummary_logger import TFSummaryLogger
import cfg
from generator import Generator
import time


if __name__ == '__main__':
    os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpulist
    # 网络结构
    with tf.name_scope('Input'):
        input_image = tf.placeholder(tf.uint8, shape=(None, cfg.input_h, cfg.input_w, 3))
        input_image_normal = tf.div(tf.to_float(input_image), 255.0)
        label_image = tf.placeholder(tf.uint8, shape=(None, cfg.input_h, cfg.input_w))
        label_image_normal = tf.cast(tf.expand_dims(label_image, -1), dtype=tf.float32)
        # 配置学习率ls
        learning_rate = tf.placeholder(tf.float32, shape=[])

    #dsn_fuse = charsiu_binary_net(input_image_normal, name='Modelnet')
    dsn_fuse = Unet(input_image_normal,name='Modelnet')
    output_sigmoid = tf.nn.sigmoid(dsn_fuse, name='ouput_sigmoid')
    binary_dsn_fuse = tf.where(tf.greater(output_sigmoid, cfg.eval_sigmoid_score),
                               tf.ones_like(dsn_fuse, tf.uint8) * 255, tf.zeros_like(dsn_fuse, tf.uint8))
    # 反向传播
    with tf.variable_scope('BP'):
        with tf.variable_scope('focal_loss'):
           # print(tf.shape(label_image_normal))
            #print(tf.shape(output_sigmoid))
            cost = confidence_loss(label_image_normal, output_sigmoid, alpha=0.99)
        with tf.variable_scope('adam_vars'):
            # 激活BN层
            with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    num_params = params_usage()
    print('参数数量 : %d' % num_params)
    # 保存训练的网络权重参数
    train_weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Modelnet')
    saver = tf.train.Saver(train_weights, max_to_keep=10)

    # 定义保存日志
    tfsummary_logger = TFSummaryLogger(cfg.logspath, tf.get_default_graph())
    TDG = Generator(cfg.traintxt, (cfg.input_h, cfg.input_h), cfg.batch_size)
    TDG.shuffle()
    VDG = Generator(cfg.traintxt, (cfg.input_h, cfg.input_h), cfg.batch_size)
    # 开始训练
    global_init = tf.global_variables_initializer()
    tfconfig = tf.ConfigProto(allow_soft_placement=False, log_device_placement=False)
    tfconfig.gpu_options.allow_growth = True
    sess = tf.Session(config=tfconfig)
    sess.run(global_init)
    # 获取最近一次训练模型
    ckpt = tf.train.latest_checkpoint(cfg.checkpoint_path)
    if cfg.restore and ckpt:
        epoch = int(ckpt.split('-')[-1])
        # min_val_loss = float(re.findall(r'_val_loss_(.+?)_loss_', ckpt)[0])
        print('continue training from previous checkpoint')
        print(ckpt)
        # print('val loss {:.6f}'.format(min_val_loss))
        saver.restore(sess, ckpt)
    else:
        epoch = 0
    min_val_loss = 1000000.
    lr = max(cfg.learning_rate * (cfg.decay_rate ** (epoch // cfg.decay_epoch)), 1e-4)

    while epoch <= cfg.max_epoch:
        total_loss = 0
        for step in range(TDG.steps_per_epoch):
            t = time.time()
            data = TDG.generator()
            t1 = time.time()
            print('Load image time: %.4f' % (time.time() - t))
            _, ml = sess.run([train_op, cost], feed_dict={input_image: data[0], label_image: data[1], learning_rate: lr})
            print('Net time %.4f' % (time.time() - t1))
            global_step = TDG.steps_per_epoch * epoch + step
            tfsummary_logger.log_scalar(tag='step_loss', value=ml, step=global_step)
            total_loss += ml
            print('Epoch:%d / %d Step: %d / %d ' % (epoch, cfg.max_epoch, step + 1, TDG.steps_per_epoch))
            print('Total loss %.6f model loss %.6f Learning rage %.8f' % (total_loss / (step + 1), ml, lr))
            if np.isnan(ml):
                print('Loss diverged, stop training')
                break
        else:
            tfsummary_logger.log_scalar(tag='train_loss', value=total_loss, step=epoch)
            if True:
                print('Val...')
                epoch += 1
                global_step = TDG.steps_per_epoch * epoch
                total_loss = total_loss / TDG.steps_per_epoch
                val_loss = 0
                img_num = 0
                log_img_folder = os.path.join(cfg.logspath, 'Epoch_%d'%epoch)
                log_imgs = []
                for i in range(VDG.steps_per_epoch):
                    val_data = VDG.generator()
                    vml, sgmoid_img, binary_img = sess.run([cost, output_sigmoid, binary_dsn_fuse], feed_dict={input_image: val_data[0], label_image: val_data[1]})
                    val_loss += vml
                    # log_img = img_mask_color_blending(val_data[0][0], binary_img[0, :, :, 0], [255])
                    log_img = np.concatenate([val_data[1][0]*255, binary_img[0][:, :, 0], sgmoid_img[0][:, :, 0]*255], axis=1)
                    log_imgs.append(log_img)
                tfsummary_logger.log_images(tag='Result', images=log_imgs, step=epoch)
                val_loss = val_loss / TDG.steps_per_epoch
                print('Eval loss {:.4f}'.format(val_loss))
                tfsummary_logger.log_scalar(tag='val_loss', value=val_loss, step=epoch)
            else:
                val_loss = total_loss
            if val_loss < min_val_loss:
                min_val_loss = val_loss
                saver.save(sess, os.path.join(cfg.checkpoint_path, 'Val_loss_%.6f_loss_%.6f_epoch' % (val_loss, total_loss)),
                           global_step=epoch, write_meta_graph=False)
                print("Model save !")
            else:
                print('val_loss 没有减少,模型未保存。')
            if epoch % cfg.decay_epoch == 0:
                lr = max(lr * cfg.decay_rate, 1e-8)
            TDG.shuffle()
            continue
        break
    sess.close()

后处理Python(OpenCV)版本 :

import logging
import math
from pathlib import Path
import cv2
import numpy as np
import tensorflow as tf
import time

__SHOW_PROCESS = False

__model_input_height = 256
__model_input_width = __model_input_height
__model_input_layer_name = 'input_img_normal:0'
__model_input_layer2_name = 'input_sigmoid_score:0'
__model_output_layer_name = 'output_binary:0'

__HoughLinesPThreshold = 20
__HoughLinesPMinLinLength = __model_input_height / 6.0
__HoughLinesPMaxLineGap = __model_input_height / 25.0

__min_distance_to_merge = __model_input_height / 10
__min_angle_to_merge = 10

# 计算线段的角度
def __line_degress(line):
    orientation = math.atan2((line[1] - line[3]), (line[0] - line[2]))
    degress = abs(math.degrees(orientation))
    return degress


# 两个向量的夹角
def __two_vector_angle(center, m, n):
    theta = math.atan2(m[0] - center[0], m[1] - center[1]) - math.atan2(n[0] - center[0], n[1] - center[1])

    if theta > math.pi:
        theta -= 2 * math.pi

    if theta < -math.pi:
        theta += 2 * math.pi

    return abs(theta * 180.0 / math.pi)


# 计算两条线段之间的距离
def __line_distance(line1, line2):
    def __line_magnitude(x1, y1, x2, y2):
        lineMagnitude = math.sqrt(math.pow((x2 - x1), 2) + math.pow((y2 - y1), 2))
        return lineMagnitude

    def __point_to_line_distance(point, line):
        px, py = point
        x1, y1, x2, y2 = line
        line_magnitude = __line_magnitude(x1, y1, x2, y2)
        if line_magnitude < 0.00000001:
            return 9999
        else:
            u1 = (((px - x1) * (x2 - x1)) + ((py - y1) * (y2 - y1)))
            u = u1 / (line_magnitude * line_magnitude)

            if (u < 0.00001) or (u > 1):
                # // closest point does not fall within the line segment, take the shorter distance
                # // to an endpoint
                ix = __line_magnitude(px, py, x1, y1)
                iy = __line_magnitude(px, py, x2, y2)
                if ix > iy:
                    distance = iy
                else:
                    distance = ix
            else:
                # Intersecting point is on the line, use the formula
                ix = x1 + u * (x2 - x1)
                iy = y1 + u * (y2 - y1)
                distance = __line_magnitude(px, py, ix, iy)
            return distance

    dist1 = __point_to_line_distance((line1[0], line1[1]), line2)
    dist2 = __point_to_line_distance((line1[2], line1[3]), line2)
    dist3 = __point_to_line_distance((line2[0], line2[1]), line1)
    dist4 = __point_to_line_distance((line2[2], line2[3]), line1)

    return min(dist1, dist2, dist3, dist4)


# 合并同一个分组里面的线段
def __merge_lines_segments(lines_group):
    if (len(lines_group) == 1):
        return lines_group[0]

    points = []
    for x1, y1, x2, y2 in lines_group:
        points.append((x1, y1))
        points.append((x2, y2))

    degress = __line_degress(lines_group[0])
    if (degress > 45) and abs(degress < (90 + 45)):
        # 竖线使用y轴坐标, 最上面和最下面的点
        points = sorted(points, key=lambda point: point[1])
    else:
        # 横线处理x轴坐标, 最左边和最右边的点
        points = sorted(points, key=lambda point: point[0])

    return (*points[0], *points[-1])


# 合并霍夫曼线段
def __merge_hough_lines(lines):
    # 按倾斜角度对线段进行第一次分组(横线,竖线)
    lines_x = []
    lines_y = []
    for l in lines:
        degress = __line_degress(l)
        if (degress > 45) and abs(degress < (90 + 45)):
            lines_x.append(l)
        else:
            lines_y.append(l)

    # 按照线段之间的角度和距离进行合并操作
    super_lines = []

    # 按距离和角度进行第二次分组
    for lines_group in [lines_x, lines_y]:
        for idx, line in enumerate(lines_group):
            group_updated = False

            # 查看当前线段是否可以合并到已有的分组里面
            for group in super_lines:
                for line2 in group:
                    if __line_distance(line2, line) < __min_distance_to_merge:
                        degress_i = __line_degress(line)
                        degress_j = __line_degress(line2)

                        if int(abs(degress_i - degress_j)) < __min_angle_to_merge:
                            group.append(line)

                            group_updated = True
                            break

                if group_updated:
                    break

            # 对于无法插入已有分组的线条建立新分组, 并筛选已经处理过的线段
            if not group_updated:
                new_group = []
                new_group.append(line)

                for line2 in lines_group[idx + 1:]:
                    if __line_distance(line2, line) < __min_distance_to_merge:
                        degress_i = __line_degress(line)
                        degress_j = __line_degress(line2)

                        if int(abs(degress_i - degress_j)) < __min_angle_to_merge:
                            new_group.append(line)

                super_lines.append(new_group)

    # 对分组的线段进行合并
    final_lines = []
    for lines_group in super_lines:
        final_lines.append(__merge_lines_segments(lines_group))

    return final_lines


# 两条线的交点
def __line_intersection(line1, line2):
    xdiff = (line1[0][0] - line1[1][0], line2[0][0] - line2[1][0])
    ydiff = (line1[0][1] - line1[1][1], line2[0][1] - line2[1][1])

    def det(a, b):
        return a[0] * b[1] - a[1] * b[0]

    div = det(xdiff, ydiff)
    if div == 0:
        return None

    d = (det(*line1), det(*line2))
    x = det(d, xdiff) / div
    y = det(d, ydiff) / div
    return (int(x), int(y))


# 查找矩形
def __find_rect(lines):
    # 线段分组
    lines_x = []  # 竖线
    lines_y = []  # 横线
    for l in lines:
        degress = __line_degress(l)
        if (degress > 45) and abs(degress < (90 + 45)):
            lines_x.append(((l[0], l[1]), (l[2], l[3])))
        else:
            lines_y.append(((l[0], l[1]), (l[2], l[3])))

    lines_x = sorted(lines_x, key=lambda x: (x[0][0] + x[1][0]) / 2)  # 从左往右排序
    lines_y = sorted(lines_y, key=lambda x: (x[0][1] + x[1][1]) / 2)  # 从上往下排序

    if len(lines_x) < 2 or len(lines_y) < 2:
        return None

    top_left_point = __line_intersection(lines_x[0], lines_y[0])
    top_right_point = __line_intersection(lines_x[-1], lines_y[0])
    bottom_right_point = __line_intersection(lines_x[-1], lines_y[-1])
    bottom_left_point = __line_intersection(lines_x[0], lines_y[-1])

    return (top_left_point, top_right_point, bottom_right_point, bottom_left_point)


def __img_straighten(image_origin, src_vertices, output_width = 720, output_height = 456):
    rect_dist = [(0, 0), (0, output_height), (output_width, 0), (output_width, output_height)]
    src = [src_vertices[0], src_vertices[3], src_vertices[1], src_vertices[2]]

    m1 = cv2.getPerspectiveTransform(np.float32(src), np.float32(rect_dist))
    img_new = cv2.warpPerspective(image_origin, m1, (output_width, output_height))
    return img_new


# 加载模型
def __load_tf_session(pbfile):
    # 加载模型
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(pbfile, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

            tfconfig = tf.ConfigProto(allow_soft_placement=False, log_device_placement=False)
            tfconfig.gpu_options.allow_growth = True
            return tf.Session(config=tfconfig, graph=detection_graph)


# 边缘检测
tf_session_edge = __load_tf_session(pbfile='./models/edge/xxxxxx-443.pb')


def card_edge_detect(origin_img, scores=0.9, imgpath='0.jpg'):
    try:
        x_ratio = float(origin_img.shape[1]) / __model_input_width
        y_ratio = float(origin_img.shape[0]) / __model_input_height

        img = cv2.resize(origin_img, (__model_input_width, __model_input_height), 0, 0, cv2.INTER_LINEAR)
        input_img = np.float32(img) / 255.0
        # 网络处理
        output_dict = tf_session_edge.run([__model_output_layer_name], feed_dict={
            __model_input_layer_name: [input_img],
            __model_input_layer2_name: scores # hkold-edge-256X256-922000.pb运行才注释掉
        })
        edge_img = output_dict[0].squeeze().astype(np.uint8)
        if __SHOW_PROCESS:
            cv2.imwrite(imgpath.replace('.jpg', '_edge_img.jpg'), edge_img*255)

        # 霍夫曼找线段
        lines = cv2.HoughLinesP(edge_img,
                                rho=1,
                                theta=np.pi * 1 / 180,
                                threshold=__HoughLinesPThreshold,
                                minLineLength=__HoughLinesPMinLinLength,
                                maxLineGap=__HoughLinesPMaxLineGap)

        if lines is None:
            return None

        lines = lines.squeeze(axis=1)
        if __SHOW_PROCESS:
            show_img = origin_img.copy()
            for l in lines:
                x1, y1, x2, y2 = l
                x1, y1, x2, y2 = int(x1 * x_ratio), int(y1 * y_ratio), int(x2 * x_ratio), int(y2 * y_ratio)
                cv2.line(show_img, (x1, y1), (x2, y2), (0, 0, 255), 1)
            cv2.imwrite(imgpath.replace('.jpg', '_HoughLinesP.jpg'), show_img)
        # 合并线段
        lines = __merge_hough_lines(lines)
        if __SHOW_PROCESS:
            show_img = origin_img.copy()
            for l in lines:
                x1, y1, x2, y2 = l
                x1, y1, x2, y2 = int(x1 * x_ratio), int(y1 * y_ratio), int(x2 * x_ratio), int(y2 * y_ratio)
                cv2.line(show_img, (x1, y1), (x2, y2), (0, 0, 255), 1)
            cv2.imwrite(imgpath.replace('.jpg', '__merge_hough_lines.jpg'), show_img)

        # 查找矩形
        rect_points = __find_rect(lines)
        if rect_points is None:
            return None

        rect_points = np.array(rect_points)
        rect_points[:, 0] = rect_points[:, 0] * x_ratio
        rect_points[:, 1] = rect_points[:, 1] * y_ratio
        rect_points.astype(np.int)

        if __SHOW_PROCESS:
            show_img = origin_img.copy()
            for (x1, y1), (x2, y2) in [(rect_points[0], rect_points[1]),
                                       (rect_points[1], rect_points[2]),
                                       (rect_points[2], rect_points[3]),
                                       (rect_points[3], rect_points[0]),
                                       ]:
                cv2.line(show_img, (x1, y1), (x2, y2), (0, 0, 255), 2)
            cv2.imwrite(imgpath.replace('.jpg', '__rect.jpg'), show_img)

        # 面积过滤
        cnt_area_size = cv2.contourArea(rect_points)
        total_area_size = origin_img.shape[0] * origin_img.shape[1]

        if cnt_area_size / total_area_size < 0.2 or cnt_area_size / total_area_size > 0.99:
            return None
        return rect_points
    except:
        logging.exception('edge_detect exception!!!')
    return None

后处理C++(OpenCV)版本:

#include "fm_ocr_scanner.hpp"
#include 
#include 

//在具体调用的地方,看这几个常量的解释
const int kHoughLinesPThreshold = 20;
const double kHoughLinesPMinLinLength = 20.0;
const double kHoughLinesPMaxLineGap = 3.0;

const int kMergeLinesMaxDistance = 5;
const int kIntersectionMinAngle = 45;
const int kIntersectionMaxAngle = 135;
const double kCloserPointMaxDistance = 6.0;
const double kRectOpposingSidesMinRatio = 0.5;
const int kPointOnLineMaxOffset = 8;
const int kSameSegmentsMaxAngle = 5;


struct Corner {
    cv::Point point;
    std::vector segments;
};


static bool IsPointOnLine(const cv::Point point, const cv::Vec4i line) {
    cv::Point p0 = cv::Point(line[0], line[1]);
    cv::Point p1 = cv::Point(line[2], line[3]);
    
    int min_x, max_x, min_y, max_y;
    min_x = MIN(p0.x, p1.x) - kPointOnLineMaxOffset;//在HED和霍夫曼检测的时候,矩形的拐角处的两条线段,可能会断开,所以这里在line的两端,适当的延长一点点距离
    max_x = MAX(p0.x, p1.x) + kPointOnLineMaxOffset;
    min_y = MIN(p0.y, p1.y) - kPointOnLineMaxOffset;
    max_y = MAX(p0.y, p1.y) + kPointOnLineMaxOffset;
    
    if (point.x >= min_x && point.x <= max_x && point.y >= min_y && point.y <= max_y) {
        return true;
    }
    
    return false;
}

//https://gist.github.com/ceykmc/18d3f82aaa174098f145
static std::array Cross(const std::array &a,
                                const std::array &b) {
    std::array result;
    result[0] = a[1] * b[2] - a[2] * b[1];
    result[1] = a[2] * b[0] - a[0] * b[2];
    result[2] = a[0] * b[1] - a[1] * b[0];
    return result;
}

//这个版本,line 是看成一条可以无限延长的直线
static bool GetIntersection(const cv::Vec4i &line_a, const cv::Vec4i &line_b, cv::Point &intersection) {
    std::array pa{ { line_a[0], line_a[1], 1 } };
    std::array pb{ { line_a[2], line_a[3], 1 } };
    std::array la = Cross(pa, pb);
    pa[0] = line_b[0], pa[1] = line_b[1], pa[2] = 1;
    pb[0] = line_b[2], pb[1] = line_b[3], pb[2] = 1;
    std::array lb = Cross(pa, pb);
    std::array inter = Cross(la, lb);
    if (inter[2] == 0) return false; // two lines are parallel
    else {
        intersection.x = inter[0] / inter[2];
        intersection.y = inter[1] / inter[2];
        return true;
    }
}

//这个版本,line实际上是有限长度的线段,所以还额外检测了一下 point 是否在线段上
static bool GetSegmentIntersection(const cv::Vec4i &line_a, const cv::Vec4i &line_b, cv::Point &intersection) {
    std::array pa{ { line_a[0], line_a[1], 1 } };
    std::array pb{ { line_a[2], line_a[3], 1 } };
    std::array la = Cross(pa, pb);
    
    pa[0] = line_b[0];
    pa[1] = line_b[1];
    pa[2] = 1;
    
    pb[0] = line_b[2];
    pb[1] = line_b[3];
    pb[2] = 1;
    
    std::array lb = Cross(pa, pb);
    std::array inter = Cross(la, lb);
    if (inter[2] == 0) return false; // two lines are parallel
    else {
        intersection.x = inter[0] / inter[2];
        intersection.y = inter[1] / inter[2];
        
        if (IsPointOnLine(intersection, line_a) == true && IsPointOnLine(intersection, line_b) == true) {
            return true;
        }
        
        return false;
    }
}


//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
/**
 atan2的返回值,并不是0~360度,而是0~180度 和/或 0~-180度,之前就是掉到这个坑里了,所以需要修正一下
 
 收集的一些真实数据,注意第二行的  (-176.228, 173.9),一个正一个负,数值其实是正确的,因为两条线都接近于水平线,一个向上倾斜一个向下倾斜,也就是一个小于180度,一个大于180度,在数学上是等价于用一个正角度表示小于180的,用一个负角度表示大于180的那个角。
 ------- debug, (angle_top, angle_bottom) are: (178.652, 179.599), (angle_right, angle_left) are: (-101.136, -75.3236)
 ------- debug, (angle_top, angle_bottom) are: (-176.228, 173.9), (angle_right, angle_left) are: (-114.411, -97.219)
 ------- debug, (angle_top, angle_bottom) are: (-142.927, -157.126), (angle_right, angle_left) are: (-62.549, -54.9165)
 ------- debug, (angle_top, angle_bottom) are: (-176.576, -179.441), (angle_right, angle_left) are: (-107.324, -64.6538)
 */
/**
 下面这两个是未修正版本,之前就是在这里引出了 bug
 static double GetAngleOfLine(const cv::Vec4i &line) {
 int x1 = line[0], y1 = line[1], x2 = line[2], y2 = line[3];
 
 double angle = atan2(y2 - y1, x2 - x1) * 180.0 / CV_PI;
 
 return angle;
 }
 
 static double GetAngleOfTwoPoints(const cv::Point &point_a, const cv::Point &point_b) {
 double angle = atan2(point_b.y - point_a.y, point_b.x - point_a.x) * 180.0 / CV_PI;
 return angle;
 }
 */

//http://opencv-users.1802565.n2.nabble.com/Angle-between-2-lines-td6803229.html
//http://stackoverflow.com/questions/2339487/calculate-angle-of-2-points
static int GetAngleOfLine(const cv::Vec4i &line) {
    int x1 = line[0], y1 = line[1], x2 = line[2], y2 = line[3];
    
    //http://stackoverflow.com/questions/1311049/how-to-map-atan2-to-degrees-0-360
    //degrees = (degrees + 360) % 360 这种修正办法,得到的是 int 类型的角度,虽然损失了一点点精度,但是还是可以满足这里算法的需求
    double angle = atan2(y2 - y1, x2 - x1) * 180.0 / CV_PI;
    int fix_angle = ((int)angle + 360) % 360;
    
    assert(fix_angle >= 0);
    assert(fix_angle <= 360);
    return fix_angle;
}

static int GetAngleOfTwoPoints(const cv::Point &point_a, const cv::Point &point_b) {
    double angle = atan2(point_b.y - point_a.y, point_b.x - point_a.x) * 180.0 / CV_PI;
    int fix_angle = ((int)angle + 360) % 360;
    
    assert(fix_angle >= 0);
    assert(fix_angle <= 360);
    return fix_angle;
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////

/**
 RefLineVec4i 比较特殊,如果把它看成向量的话,它的反向是遵守一定的规则的。
 RefLineVec4i 里面的两个点,总是从左往右的方向,如果 RefLine 和 Y 轴平行(区分不了左右),则按照从下往上的方向
 */
typedef cv::Vec4i RefLineVec4i;

static bool IsTwoRefLineCloseToEachOther(RefLineVec4i line_a, RefLineVec4i line_b) {
    if (std::abs(line_a[1] - line_b[1]) < kMergeLinesMaxDistance && std::abs(line_a[3] - line_b[3]) < kMergeLinesMaxDistance) {
        return true;
    }
    
    return false;
}


static RefLineVec4i GetRefLine(const cv::Vec4i line, int image_width, int image_height) {
    /**
     line format is (x_{start}, y_{start}, x_{end}, y_{end})
     分别对应          line[0]    line[1]    line[2]  line[3]
     
     公式为 y = a*x + b
     
     根据下面两个等式
     line[1] = a*line[0] + b
     line[3] = a*line[2] + b
     
     可以进行推导
     line[1] - line[3] = a* (line[0] - line[2])
     
     得到
     a = (line[1] - line[3]) / (line[0] - line[2])
     b = line[1] - a*line[0]
       = (line[0]*line[3] - line[2]*line[1]) / (line[0] - line[2])
     */
    
    RefLineVec4i ref_line;
    
    if (line[0] == line[2]) {
        //和 Y 轴平行的线,按照从下往上的方向排列
        ref_line[0] = line[0];
        ref_line[1] = 0; //从下往上
        ref_line[2] = line[2];
        ref_line[3] = image_height;
    } else if (line[1] == line[3]) {
        //和 X 轴平行的线,按照从左往右的方向
        ref_line[0] = 0; //从左往右
        ref_line[1] = line[1];
        ref_line[2] = image_width;
        ref_line[3] = line[3];
    } else {
        //这个分支的斜线才能通过公式进行计算,而且不会遇到下面这个除法中 (line[0] - line[2]) == 0 的情况,避免计算错误
        //a = (line[1] - line[3]) / (line[0] - line[2])
        
        float a, b;
        a = (float)(line[1] - line[3]) / (float)(line[0] - line[2]);
        b = (float)(line[0]*line[3] - line[2]*line[1]) / (float)(line[0] - line[2]);
        
        // y = a*x + b
        ref_line[0] = 0; //从左往右
        ref_line[1] = int(b);
        ref_line[2] = int((image_height - b) / a);
        ref_line[3] = image_height;// ref_line[3] = a*ref_line[2] + b
        
        //std::cout << "__ ref_line are: (" << ref_line[0] << ", " << ref_line[1] << ", " << ref_line[2] << ", " << ref_line[3] << ")" << std::endl;
    }
 
    return ref_line;
}

static bool SortPointsByXaxis(const cv::Point &a, const cv::Point &b) {
    return a.x < b.x;
}

static bool SortPointsByYaxis(const cv::Point &a, const cv::Point &b) {
    return a.y < b.y;
}

static bool SortCornersByXaxis(const Corner &a, const Corner &b) {
    return a.point.x < b.point.x;
}

static bool SortCornersByYaxis(const Corner &a, const Corner &b) {
    return a.point.y < b.point.y;
}

static bool IsSegmentsHasSameSegment(const std::vector segments, const cv::Vec4i segment, int image_width) {
    for (int i = 0; i < segments.size(); i++) {
        cv::Vec4i seg = segments[i];
        
        int angle_a = GetAngleOfLine(seg);
        int angle_b = GetAngleOfLine(segment);
        
        int diff = std::abs(angle_a - angle_b);
        diff = diff % 90;//修正到0~90度
        
        //std::cout << " ********************, angle_a, angle_b are: (" << angle_a << ", " << angle_b << "), diff is: " << diff << std::endl;
        if (diff < kSameSegmentsMaxAngle || diff > (90 - kSameSegmentsMaxAngle)) {
            return true;
        }
    }
    
    //TODO,还可以考虑是否需要更严格的判断策略
    return false;
}

/**
 HoughLinesP检测出来的是线段,是有长度的。
 把每一个线段扩展成一个统一规格的 RefLineVec4i,形成一个 pair,然后调用这个函数,对这些 pair 进行合并。
 用RefLineVec4i来决策是否需要合并,对于需要合并的,则把对应的HoughLinesP线段重组成一个更长的线段。
 */
static std::vector > MergeRefLineAndSegmentPairs(std::vector > ref_line_and_segment_pairs, int image_width, int image_height) {
    std::vector > merged_ref_line_and_segment_pairs;
    
    for (int i = 0; i < ref_line_and_segment_pairs.size(); i++) {
        std::tuple ref_line_and_segment = ref_line_and_segment_pairs[i];
        
        auto ref_line = std::get<0>(ref_line_and_segment);
        auto segment = std::get<1>(ref_line_and_segment);
        
        if (merged_ref_line_and_segment_pairs.size() == 0) {
            merged_ref_line_and_segment_pairs.push_back(std::make_tuple(ref_line, segment));
        } else {
            bool isCloser = false;
            for (int j = 0; j < merged_ref_line_and_segment_pairs.size(); j++) {
                auto merged_ref_line_and_segment = merged_ref_line_and_segment_pairs[j];
                auto merged_ref_line = std::get<0>(merged_ref_line_and_segment);
                auto merged_segment = std::get<1>(merged_ref_line_and_segment);
                
                //std::cout << "debug, std::abs(line[1] - merged_line[1]) " << std::abs(line[1] - merged_line[1]) << ", std::abs(line[3] - merged_line[3]) " << std::abs(line[3] - merged_line[3]) << std::endl;
                
                if (IsTwoRefLineCloseToEachOther(ref_line, merged_ref_line) == true) {
                    //如果两条 ref line 很接近,则把两个segment合并成一个,然后重新生成新的 ref line
                    
                    //先取出4个点
                    cv::Point p0 = cv::Point(segment[0], segment[1]);
                    cv::Point p1 = cv::Point(segment[2], segment[3]);
                    cv::Point p2 = cv::Point(merged_segment[0], merged_segment[1]);
                    cv::Point p3 = cv::Point(merged_segment[2], merged_segment[3]);
                    
                    std::vector point_vector;
                    point_vector.push_back(p0);
                    point_vector.push_back(p1);
                    point_vector.push_back(p2);
                    point_vector.push_back(p3);
                    
                    //排序之后,得到最左边和最右边的两个 point,这两个 point 就可以构成新的线段
                    std::sort(point_vector.begin(), point_vector.end(), SortPointsByXaxis);
                    cv::Point left_most_point = point_vector[0];
                    cv::Point right_most_point = point_vector[3];
                    
                    cv::Vec4i new_segment;
                    new_segment[0] = left_most_point.x;
                    new_segment[1] = left_most_point.y;
                    new_segment[2] = right_most_point.x;
                    new_segment[3] = right_most_point.y;
                    //TODO,考虑一下,这里是否需要使用其他的线段合并策略,是否需要把新的线段的两个 point,做一个细微调整,让这两个 point 正好处于新的直线上
                    
                    RefLineVec4i new_ref_line = GetRefLine(new_segment, image_width, image_height);
                    merged_ref_line_and_segment_pairs[j] = std::make_tuple(new_ref_line, new_segment);
                    isCloser = true;
                    break;
                }
            }
            
            if (isCloser == false) {
                merged_ref_line_and_segment_pairs.push_back(std::make_tuple(ref_line, segment));
            }
        }
    }
    
    return merged_ref_line_and_segment_pairs;
}

static double PointsDistance(const cv::Point &a, const cv::Point &b) {
    double x_distance = (double)a.x - (double)b.x;
    double y_distance = (double)a.y - (double)b.y;
    
    double distance = cv::sqrt(x_distance * x_distance + y_distance * y_distance);
    
    //std::cout << " -- pointsDistance, [x_distance, y_distance, distance] are: [" << x_distance << ", " << y_distance << ", " << distance << "]" << std::endl;
    return distance;
}

/**
 按照顺时针排序,对4个 corner 排序,得到 4 corners: top-left, top-right, bottom-right, bottom-left, index are 0, 1, 2, 3
 */
static std::vector ArrangeRectCorners(std::vector rect_corners) {
    assert(rect_corners.size() == 4);
    
    std::sort(rect_corners.begin(), rect_corners.end(), SortCornersByXaxis);
    
    std::vector left_two_corners;
    std::vector right_two_corners;
    left_two_corners.push_back(rect_corners[0]);
    left_two_corners.push_back(rect_corners[1]);
    right_two_corners.push_back(rect_corners[2]);
    right_two_corners.push_back(rect_corners[3]);
    
    std::sort(left_two_corners.begin(), left_two_corners.end(), SortCornersByYaxis);
    std::sort(right_two_corners.begin(), right_two_corners.end(), SortCornersByYaxis);
    
    std::vector sorted_corners;// top-left, top-right, bottom-right, bottom-left
    sorted_corners.push_back(left_two_corners[0]);
    sorted_corners.push_back(right_two_corners[0]);
    sorted_corners.push_back(right_two_corners[1]);
    sorted_corners.push_back(left_two_corners[1]);
    
    return sorted_corners;
}

/**
 一组策略,判断4个 corner 是否可以形成一个可信度高的矩形(有透视变换效果,所以肯定不是标准的长方形,而是一个梯形或平行四边形)
 4个 point 是已经经过ArrangeRectPoints排过序的
 4 points top-left, top-right, bottom-right, bottom-left, index are 0, 1, 2, 3
 */
static bool IsRectCornersReasonable(std::vector rect_corners, int image_width) {
    assert(rect_corners.size() == 4);
    
    //第一组策略,根据之前记录的 segment 和四边形每条边的相似度进行过滤
    std::vector rect_points;
    rect_points.push_back(rect_corners[0].point);
    rect_points.push_back(rect_corners[1].point);
    rect_points.push_back(rect_corners[2].point);
    rect_points.push_back(rect_corners[3].point);
    
    cv::Vec4i segment_0_to_1 = cv::Vec4i(rect_points[0].x, rect_points[0].y, rect_points[1].x, rect_points[1].y);
    cv::Vec4i segment_1_to_2 = cv::Vec4i(rect_points[1].x, rect_points[1].y, rect_points[2].x, rect_points[2].y);
    cv::Vec4i segment_2_to_3 = cv::Vec4i(rect_points[2].x, rect_points[2].y, rect_points[3].x, rect_points[3].y);
    cv::Vec4i segment_3_to_0 = cv::Vec4i(rect_points[3].x, rect_points[3].y, rect_points[0].x, rect_points[0].y);
    
    std::vector rect_segments;
    rect_segments.push_back(segment_0_to_1);
    rect_segments.push_back(segment_1_to_2);
    rect_segments.push_back(segment_2_to_3);
    rect_segments.push_back(segment_3_to_0);
    
    
    /**
     segment_0_to_1这条线段,应该和rect_corners[0]的所有 segments 里面的至少一条线段是相似的,同时,
     segment_0_to_1这条线段,也应该和rect_corners[1]的所有 segments 里面的至少一条线段是相似的
     */
    if (IsSegmentsHasSameSegment(rect_corners[0].segments, segment_0_to_1, image_width) &&
        IsSegmentsHasSameSegment(rect_corners[1].segments, segment_0_to_1, image_width)) {
        
    } else {
        return false;
    }
    
    if (IsSegmentsHasSameSegment(rect_corners[1].segments, segment_1_to_2, image_width) &&
        IsSegmentsHasSameSegment(rect_corners[2].segments, segment_1_to_2, image_width)) {
        
    } else {
        return false;
    }
    
    if (IsSegmentsHasSameSegment(rect_corners[2].segments, segment_2_to_3, image_width) &&
        IsSegmentsHasSameSegment(rect_corners[3].segments, segment_2_to_3, image_width)) {
        
    } else {
        return false;
    }
    
    if (IsSegmentsHasSameSegment(rect_corners[3].segments, segment_3_to_0, image_width) &&
        IsSegmentsHasSameSegment(rect_corners[0].segments, segment_3_to_0, image_width)) {
        
    } else {
        return false;
    }
    
    
    //第二组策略,根据四边形的形状
    double distance_of_0_to_1 = PointsDistance(rect_points[0], rect_points[1]);
    double distance_of_1_to_2 = PointsDistance(rect_points[1], rect_points[2]);
    double distance_of_2_to_3 = PointsDistance(rect_points[2], rect_points[3]);
    double distance_of_3_to_0 = PointsDistance(rect_points[3], rect_points[0]);
    
    
    //计算两组对边的比例(0.0 -- 1.0的值)
    //两条对边(标准矩形的时候,就是两条平行边)的 minLength / maxLength,不能小于0.5,否则就认为不是矩形(本来是把这个阈值设置为0.8的,但是因为图片都是缩放后进行的处理,长宽比有很大的变化,所以把这里的过滤条件放宽一些,设置为0.5)
    //distance_of_0_to_1 和 distance_of_2_to_3 是两条对边
    double ratio1 = MIN(distance_of_0_to_1, distance_of_2_to_3) / MAX(distance_of_0_to_1, distance_of_2_to_3);
    double ratio2 = MIN(distance_of_1_to_2, distance_of_3_to_0) / MAX(distance_of_1_to_2, distance_of_3_to_0);
    
    //std::cout << " ------- debug, distance_of_1_to_2 and distance_of_3_to_0 are: (" << distance_of_1_to_2 << ", " << distance_of_3_to_0 << ")" << std::endl;
    //std::cout << " ------- debug, ratio1 and ratio2 are: (" << ratio1 << ", " << ratio2 << ")" << std::endl;
    if ((ratio1 >= kRectOpposingSidesMinRatio) && (ratio2 >= kRectOpposingSidesMinRatio)) {
        //两组对边,至少有一组是接近平行状态的(根据透视角度的不同,四边形是一个梯形或者平行四边形)
        //用这个再做一轮判断
        
        int angle_top, angle_bottom, angle_left, angle_right;//4条边和水平轴的夹角
        angle_top = GetAngleOfTwoPoints(rect_points[1], rect_points[0]);
        angle_bottom = GetAngleOfTwoPoints(rect_points[2], rect_points[3]);
        
        angle_right = GetAngleOfTwoPoints(rect_points[2], rect_points[1]);
        angle_left = GetAngleOfTwoPoints(rect_points[3], rect_points[0]);
        
        //std::cout << "\n\n ------- debug, (angle_top, angle_bottom) are: (" << angle_top << ", " << angle_bottom << "), (angle_right, angle_left) are: (" << angle_right << ", " << angle_left << ")" << std::endl;
        
        int diff1 = std::abs(angle_top - angle_bottom);
        int diff2 = std::abs(angle_right - angle_left);
        diff1 = diff1 % 90;
        diff2 = diff2 % 90;//修正到0~90度
        //std::cout << " ---------------debug, diff1 and diff2 are: [" << diff1 << ", " << diff2 << "]" << std::endl;
        
        //这里的几个值,都是经验值
        if (diff1 <= 8 && diff2 <= 8) {
            //俯视拍摄,平行四边形
            return true;
        }
        
        if (diff1 <= 8 && diff2 <= 45) {
            //梯形,有透视效果
            return true;
        }
        if (diff1 <= 45 && diff2 <= 8) {
            //梯形,有透视效果
            return true;
        }
    }
    
    return false;
}


#define ENABLE_DEBUG_MODE
std::tuple, std::vector > ProcessEdgeImage(cv::Mat edge_image, cv::Mat color_image, bool draw_debug_image) {
    assert(edge_image.rows == color_image.rows);
    assert(edge_image.cols == color_image.cols);
    
    int height = edge_image.rows;
    int width = edge_image.cols;
    
    std::vector results;
    std::vector debug_images;
    
#ifdef ENABLE_DEBUG_MODE
    cv::Mat lines_image, corners_image, rect_image;
    if (draw_debug_image) {
        lines_image = color_image.clone();
        corners_image = color_image.clone();
        rect_image = color_image.clone();
    }
#endif
    
    /**
     find rectangles
     http://blog.ayoungprogrammer.com/2013/04/tutorial-detecting-multiple-rectangles.html/
     https://github.com/bsdnoobz/opencv-code/blob/master/quad-segmentation.cpp
     http://monkeycoding.com/?p=656
     */
    //<1>0.0~1.0/float类型的image,转换成0~255/int类型
    cv::Mat gray_image;
    edge_image.convertTo(gray_image, CV_8UC1, 255.0);//http://stackoverflow.com/questions/22117267/how-to-convert-an-image-to-a-float-image-in-opencv   http://stackoverflow.com/questions/6302171/convert-uchar-mat-to-float-mat-in-opencv
    
    //<2>找线段
    cv::Mat binary_image;
    threshold(gray_image, binary_image, 128, 255, cv::THRESH_BINARY); //HoughLinesP的输入 mat 是二值化的
    gray_image = binary_image;
    /**
     vector lines;
     HoughLinesP(dst, lines, 1, CV_PI/180, 50, 50, 10 );
     
     with the arguments:
     
     dst: Output of the edge detector. It should be a grayscale image (although in fact it is a binary one)
     lines: A vector that will store the parameters (x_{start}, y_{start}, x_{end}, y_{end}) of the detected lines
     rho : The resolution of the parameter r in pixels. We use 1 pixel.
     theta: The resolution of the parameter \theta in radians. We use 1 degree (CV_PI/180)
     threshold: The minimum number of intersections to “detect” a line
     minLinLength: The minimum number of points that can form a line. Lines with less than this number of points are disregarded.
     maxLineGap: The maximum gap between two points to be considered in the same line.
     */
    std::vector linesP;
    cv::HoughLinesP(gray_image, linesP, 1, CV_PI * 1/180, kHoughLinesPThreshold, kHoughLinesPMinLinLength, kHoughLinesPMaxLineGap);//这组参数,可以检测到比较小的矩形,但是对干扰物就更敏感了,很容易检测出矩形框之外的短线段
    
    //<3>线段转换成 参考直线(其实是正好被 image 完整尺寸截断的线段),并且做一轮过滤
    std::vector > ref_line_and_segment_pairs;
    for (int i = 0; i < linesP.size(); i++) {
        cv::Vec4i segment = linesP[i];
        RefLineVec4i ref_line = GetRefLine(segment, edge_image.cols, edge_image.rows);//线段延展成 参考线
        
        //线段长度过滤
        double segment_length = cv::sqrt(((float)segment[1] - segment[3]) * ((float)segment[1] - segment[3]) + ((float)segment[0] - segment[2]) * ((float)segment[0] - segment[2]));
        if (segment_length > kHoughLinesPMinLinLength) {
            ref_line_and_segment_pairs.push_back(std::make_tuple(ref_line, segment));
        }
    }
    
    //<4>合并临近的直线
    std::vector > merged_ref_line_and_segment_pairs = MergeRefLineAndSegmentPairs(ref_line_and_segment_pairs, edge_image.cols, edge_image.rows);
    std::vector ref_lines;
    std::vector segments;
    for (int i = 0; i < ref_line_and_segment_pairs.size(); i++) {
        std::tuple ref_line_and_segment = ref_line_and_segment_pairs[i];
        
        auto ref_line = std::get<0>(ref_line_and_segment);
        auto segment = std::get<1>(ref_line_and_segment);
        
        ref_lines.push_back(ref_line);
        segments.push_back(segment);
    }
 
#ifdef ENABLE_DEBUG_MODE
    if (draw_debug_image) {
        for (int i = 0; i < segments.size(); i++) {
            cv::Vec4i v = segments[i];
            if (draw_debug_image) {
                cv::line(lines_image, cv::Point(v[0], v[1]), cv::Point(v[2], v[3]), CV_RGB(0,255,0));
            }
            //std::cout << " ^^^^^^ debug, cv::Point(v[0], v[1]) is: " << cv::Point(v[0], v[1]) << ",  cv::Point(v[2], v[3]) is: " << cv::Point(v[2], v[3]) << std::endl;
        }
    }
#endif
    
    //<5>寻找segment线段的交叉点以及过滤
    std::vector all_corners;
    std::vector corners;
    for (int i = 0; i < segments.size(); i++) {
        for (int j = i + 1; j < segments.size(); j++) {
            cv::Vec4i segment_a = segments[i], segment_b = segments[j];
            
            //https://gist.github.com/ceykmc/18d3f82aaa174098f145 two lines intersection
            //http://stackoverflow.com/questions/20677795/how-do-i-compute-the-intersection-point-of-two-lines-in-python
            cv::Point intersection_point;
            if (GetSegmentIntersection(segment_a, segment_b, intersection_point) == true) {
                all_corners.push_back(intersection_point);
                
                
                //对交叉点进行第一轮过滤
                if (intersection_point.x <= 0 || intersection_point.y <= 0
                    || intersection_point.x >= width || intersection_point.y >= height) {
                    //std::cout << "^^^^^^^^^^^^^^ pointer <= 0, do not need " << std::endl;
                    //交叉点如果在图片外部,也需要过滤掉
                } else {
                    int thetaA = GetAngleOfLine(segment_a);
                    int thetaB = GetAngleOfLine(segment_b);
                    
                    int angle = std::abs(thetaA - thetaB);
                    angle = angle % 180;//再修正到180度范围内
                    //std::cout << " ------- debug, (thetaA, thetaB) are: (" << thetaA << ", " << thetaB <<  "), two line angle is " << angle << std::endl;
                    
                    if (angle >= kIntersectionMinAngle && angle <= kIntersectionMaxAngle) {
                        //基于两条线的角度进行过滤
                        Corner c = Corner();
                        c.point = intersection_point;
                        c.segments.push_back(segment_a);
                        c.segments.push_back(segment_b);
                        corners.push_back(c);
                    }
                }
            }
        }
    }
    
    //对交叉点进行第二轮过滤,两个点如果很接近,则合并成同一个点,并且用他们的平均值来标示该点
    std::vector average_corners;
    for(int i = 0; i < corners.size(); i++) {
        Corner corner = corners[i];
        
        if (average_corners.size() == 0) {
            average_corners.push_back(corner);
        } else {
            bool isCloser = false;
            for (int j = 0; j < average_corners.size(); j++) {
                Corner c = average_corners[j];
                
                cv::Point diff = corner.point - c.point;
                double distance = cv::sqrt(diff.x*diff.x + diff.y*diff.y);
                //std::cout << " _____ debug, distance is: " << distance << std::endl;
                if (distance < kCloserPointMaxDistance) {
                    //两个点很靠近,合并成同一个点
                    Corner newCornet = Corner();
                    newCornet.point = cv::Point((corner.point.x + c.point.x) / 2, (corner.point.y + c.point.y) / 2);
                    
                    //还要合并每个 cornet 的 segment 线段数组
                    std::vector segment_a = corner.segments;
                    std::vector segment_b = c.segments;
                    
                    //这种办法合并数组更高效
                    //http://stackoverflow.com/questions/2551775/appending-a-vector-to-a-vector
                    newCornet.segments.insert(newCornet.segments.end(), segment_a.begin(), segment_a.end());
                    newCornet.segments.insert(newCornet.segments.end(), segment_b.begin(), segment_b.end());
                    
                    average_corners[j] = newCornet;
                    isCloser = true;
                    break;
                }
            }
            
            if (isCloser == false) {
                average_corners.push_back(corner);
            }
        }
    }
    //std::cout << "debug, all_corners " << all_corners.size() << ",  corners " << corners.size() << ", average_corners " << average_corners.size() << std::endl;

#ifdef ENABLE_DEBUG_MODE
    if (draw_debug_image) {
        for (int i = 0; i < average_corners.size(); i++) {
            Corner corner = average_corners[i];
            cv::circle(corners_image, corner.point, 3, CV_RGB(255,0,0), 2);
            //std::cout << "average_corners i = " << i << ",  point is: " << corner.point << ", segment size is: " << corner.segments.size() << std::endl;
            
            for (int j = 0; j < corner.segments.size(); j++) {
                cv::Vec4i v = corner.segments[j];
                cv::line(corners_image, cv::Point(v[0], v[1]), cv::Point(v[2], v[3]), CV_RGB(0,0,255));
            }
        }
    }
#endif
    
    //<6>寻找四边形
    if (average_corners.size() >= 4) {
        //至少要有4个点,才算是矩形(TODO,如果点太多,还会影响计算性能,所以可能还需要一个上限值,并且,当达到上限值的时候,还需要考虑如何进一步处理,减少点的数量)
        double maxPerimeter = 0.0;
        
        std::vector rect_corners;
        std::vector rect_corners_with_max_perimeter;
        std::vector rect_points_with_max_perimeter;
        
        //4重循环的计算量还是有点大
        for(int i = 0; i <= average_corners.size() - 4; i++) {
            for(int j = i + 1; j <= average_corners.size() - 3; j++) {
                for(int m = j + 1; m <= average_corners.size() - 2; m++) {
                    for(int n = m + 1; n <= average_corners.size() - 1; n++) {
                        
                        rect_corners.clear();
                        rect_corners.push_back(average_corners[i]);
                        rect_corners.push_back(average_corners[j]);
                        rect_corners.push_back(average_corners[m]);
                        rect_corners.push_back(average_corners[n]);
                        
                        //对四个点按照顺时针方向排序
                        rect_corners = ArrangeRectCorners(rect_corners);
                        
                        //如果不是一个合理的四边形,则直接排除
                        if (IsRectCornersReasonable(rect_corners, edge_image.cols) == false) {
                            continue;
                        }
                        
                        std::vector rect_points;
                        rect_points.push_back(rect_corners[0].point);
                        rect_points.push_back(rect_corners[1].point);
                        rect_points.push_back(rect_corners[2].point);
                        rect_points.push_back(rect_corners[3].point);
                        
                        double perimeter = contourArea(rect_points);//或者用最大面积
                        //double perimeter = arcLength(rect_points, true);//最大周长
                        //std::cout << "#############debug, perimeter is: " << perimeter << std::endl;
                        
                        if (perimeter > maxPerimeter) {
                            maxPerimeter = perimeter;
                            rect_corners_with_max_perimeter = rect_corners;
                            rect_points_with_max_perimeter = rect_points;
                        }
                    }
                }
            }
        }
        
        if (rect_points_with_max_perimeter.size() == 4) {
#ifdef ENABLE_DEBUG_MODE
            if (draw_debug_image) {
                const cv::Point *pts = (const cv::Point*) cv::Mat(rect_points_with_max_perimeter).data;
                int npts = cv::Mat(rect_points_with_max_perimeter).rows;
                
                polylines(rect_image, &pts, &npts, 1,
                          true,
                          cv::Scalar(0, 255, 255),
                          2,
                          CV_AA, 0);
            }
#endif
            
            results = rect_points_with_max_perimeter;
        }
    }
    
#ifdef ENABLE_DEBUG_MODE
    if (draw_debug_image) {
        debug_images.push_back(gray_image);
        debug_images.push_back(lines_image);
        debug_images.push_back(corners_image);
        debug_images.push_back(rect_image);
    }
#endif
    
    bool find_rect = results.size() == 4 ? true : false;
    return std::make_tuple(find_rect, results, debug_images);
}

相关链接(感谢C++版本作者):

1.http://fengjian0106.github.io/2017/05/08/Document-Scanning-With-TensorFlow-And-OpenCV/

2.http://fengjian0106.github.io/2018/06/02/Document-Scanning-With-TensorFlow-And-OpenCV-Part-Two/

3.https://github.com/fengjian0106/hed-tutorial-for-document-scanning

你可能感兴趣的:(计算机视觉/图像处理,C/C++,Python)