1.拍照文档扫描识别是办公类App里面最常用到的的一类应用,市面上有很多相关的App,及主要技术点有几个要用到图像处理,有边缘检测校正,文档滤镜,和OCR。
2.关于边缘文档连续检测,有用传统算法,也有用深度学习的,是于传统算法,场景泛化能力并不是很好,深度学习方向的,我之前用HED写过一个身份证边缘检测项目,但要运行到移动端速度并不理想。
3.Mobile LSD(M-LSD一种用于资源受限环境的实时和轻量级的线段检测网络。Mobile LSD在 Android 和 iPhone 移动设备上的运行速度分别为 56.8 FPS 和 48.6 FPS。作者还称这是首个在移动设备上可用的实时深度 LSD 方法。官方论文:https://arxiv.org/abs/2106.00186 ,开源代码:https://github.com/navervision/mlsd 。
4.官方给的测试效果:
C++实现的效果:
5.我的开发环境是win10, vs2019, opencv4.5, ncnn,如果要启用GPU加速,所以用到VulkanSDK,实现语言是C++。
1.官方有给出几种尺寸的模型,这里要把官方的模型转成ncnn的模型,模型转换的步骤可以看ncnn官方文档。
2.模型转换之后,用模型来做线段检测,推理代码:
int MLSD::detectLines(const cv::Mat& cv_src,std::vector<Line> &lines, int topk, float score_threshold, float dist_threshold)
{
w_scale = cv_src.cols / float(target_size);
h_scale = cv_src.rows / float(target_size);
int out_size = target_size / 2;
ncnn::Extractor ex = lsdnet.create_extractor();
ncnn::Mat ncnn_in = ncnn::Mat::from_pixels_resize(cv_src.data,ncnn::Mat::PIXEL_RGB2BGRA, cv_src.cols, cv_src.rows, target_size, target_size);
ncnn_in.substract_mean_normalize(0, norm_vals);
ex.input("input", ncnn_in);
ncnn::Mat org_disp_map, max_map, center_map;
ex.extract("out1", org_disp_map);
ex.extract("Decoder/Sigmoid_4:0", center_map);
ex.extract("out2", max_map);
float* max_map_data = (float*)max_map.data;
float* center_map_data = (float*)center_map.data;
std::vector<std::pair<float, int>> sort_result(max_map.total());
for (int i = 0; i < max_map.total(); i++)
{
if (max_map_data[i] == center_map_data[i])
{
sort_result[i] = std::pair<float, int>(max_map_data[i],i);
}
}
std::partial_sort(sort_result.begin(), sort_result.begin() + topk, sort_result.end(), std::greater<std::pair<float, int> >());
std::vector<std::pair<int, int>>topk_pts;
for (int i = 0; i < topk; i++)
{
int x = sort_result[i].second % out_size;
int y = sort_result[i].second / out_size;
topk_pts.push_back(std::pair<int, int>(x, y));
}
ncnn::Mat start_map = org_disp_map.channel_range(0, 2).clone();
ncnn::Mat end_map = org_disp_map.channel_range(2, 2).clone();
ncnn::Mat dist_map = ncnn::Mat(out_size, out_size, 1);
float* start_map_data = (float*)start_map.data;
float* end_map_data = (float*)end_map.data;
for (int i = 0; i < start_map.total(); i++)
{
start_map_data[i] = (start_map_data[i] - end_map_data[i]) * (start_map_data[i] - end_map_data[i]);
}
float* dist_map_data = (float*)dist_map.data;
for (int i = 0; i < start_map.total()/2; i++)
{
dist_map_data[i] = std::sqrt(start_map_data[i] + start_map_data[i + start_map.channel(0).total()]);
}
for (int i = 0; i < topk_pts.size(); ++i)
{
int x = topk_pts[i].first;
int y = topk_pts[i].second;
float distance = dist_map_data[y * out_size + x];
if (sort_result[i].first > score_threshold && distance > dist_threshold)
{
int disp_x_start = org_disp_map.channel(0)[y * out_size + x];
int disp_y_start = org_disp_map.channel(1)[y * out_size + x];
int disp_x_end = org_disp_map.channel(2)[y * out_size + x];
int disp_y_end = org_disp_map.channel(3)[y * out_size + x];
int x_start = std::max(std::min((int)((x + disp_x_start) * 2), target_size), 0);
int y_start = std::max(std::min((int)((y + disp_y_start) * 2), target_size), 0);
int x_end = std::max(std::min((int)((x + disp_x_end ) * 2), target_size), 0);
int y_end = std::max(std::min((int)((y + disp_y_end ) * 2), target_size), 0);
lines.push_back(Line{ cv::Point(x_start*w_scale, y_start*h_scale), cv::Point(x_end*w_scale, y_end*h_scale)});
}
}
return 0;
}
运行效果:
得到物体的边缘线段之后,对线段进行排序拟合,找出文档的四个校正点
代码:
int MLSD::detectEdge(const cv::Mat &cv_src, std::vector<cv::Point>& out_points)
{
std::vector<Line> lines;
detectLines(cv_src, lines);
std::vector<Line> h_lines, v_lines;
for (auto v : lines)
{
double delta_x = v._p1.x - v._p2.x, delta_y = v._p1.y - v._p2.y;
if (fabs(delta_x) > fabs(delta_y))
{
h_lines.push_back(v);
}
else
{
v_lines.push_back(v);
}
}
if (h_lines.size() >= 2 && v_lines.size() >= 2)
{
std::sort(h_lines.begin(), h_lines.end(), cmpLineY);
std::sort(v_lines.begin(), v_lines.end(), cmpLineX);
out_points.push_back(computeIntersect(h_lines[0], v_lines[0]));
out_points.push_back(computeIntersect(h_lines[0], v_lines[v_lines.size() - 1]));
out_points.push_back(computeIntersect(h_lines[h_lines.size() - 1], v_lines[0]));
out_points.push_back(computeIntersect(h_lines[h_lines.size() - 1], v_lines[v_lines.size() - 1]));
}
else
{
out_points.push_back(cv::Point2f(2, 2));
out_points.push_back(cv::Point2f(2, cv_src.rows - 2));
out_points.push_back(cv::Point2f(cv_src.cols - 2, 2));
out_points.push_back(cv::Point2f(cv_src.cols - 2, cv_src.rows - 2));
}
for (int i = 0; i < out_points.size(); i++)
{
out_points.at(i).x = out_points.at(i).x * w_scale;
out_points.at(i).y = out_points.at(i).y * h_scale;
out_points.at(i).x = out_points.at(i).x < 0 ? 0 : out_points.at(i).x;
out_points.at(i).y = out_points.at(i).y < 0 ? 0 : out_points.at(i).y;
out_points.at(i).x = out_points.at(i).x > cv_src.cols ? cv_src.cols : out_points.at(i).x;
out_points.at(i).y = out_points.at(i).y > cv_src.rows ? cv_src.rows : out_points.at(i).y;
}
return 0;
}
运行效果:
按边缘角点对文档做畸变校正。
int MLSD::reviseImage(const cv::Mat& cv_src, cv::Mat& cv_dst, std::vector<cv::Point>& in_points)
{
cv::Point point_f, point_b;
point_f.x = (in_points.at(0).x < in_points.at(2).x) ? in_points.at(0).x : in_points.at(2).x;
point_f.y = (in_points.at(0).y < in_points.at(1).y) ? in_points.at(0).y : in_points.at(1).y;
point_b.x = (in_points.at(3).x > in_points.at(1).x) ? in_points.at(3).x : in_points.at(1).x;
point_b.y = (in_points.at(3).y > in_points.at(2).y) ? in_points.at(3).y : in_points.at(2).y;
//代码取目标的最小外接矩形,但倾斜45度时会出现比例变形的现象
cv::Rect rect(point_f, point_b);
cv_dst = cv::Mat::zeros(rect.height, rect.width, CV_8UC3);
std::vector<cv::Point2f> dst_pts;
dst_pts.push_back(cv::Point2f(0, 0));
dst_pts.push_back(cv::Point2f(rect.width - 1, 0));
dst_pts.push_back(cv::Point2f(0, rect.height - 1));
dst_pts.push_back(cv::Point2f(rect.width - 1, rect.height - 1));
std::vector<cv::Point2f> tr_points;
tr_points.push_back(in_points.at(0));
tr_points.push_back(in_points.at(1));
tr_points.push_back(in_points.at(2));
tr_points.push_back(in_points.at(3));
cv::Mat transmtx = getPerspectiveTransform(tr_points, dst_pts);
warpPerspective(cv_src, cv_dst, transmtx, cv_dst.size());
return 0;
}