3.这里的对比环境是Win 10, vs2019,OpenCV4.5, MLSD会用到NCNN加速库,使用的语言是C++。
(4)对于任意一条直线上的所有点来说,变换到极坐标中,从[0~360]空间,可以得到r的大小,属于同一条直线上点在极坐标空(r, theta)必然在一个点上有最强的信号出现,根据此反算到平面坐标中就可以得到直线上各点的像素坐标。从而得到直线。
void getCanny(cv::Mat &gray, cv::Mat& canny)
cv::Mat thres;
double high_thres = threshold(gray, thres, 0, 255, cv::THRESH_BINARY | cv::THRESH_OTSU), low_thres = high_thres * 0.5;
cv::Canny(gray, canny, low_thres, high_thres);
void lineDetection(cv::Mat &cv_src, cv::Mat &cv_dst)
float w = cv_src.cols >= cv_src.rows ? 960.00 : 640.00;
float h = cv_src.rows > cv_src.cols ? 960.00 : 640.00;
float x_s = cv_src.cols / w;
float y_s = cv_src.rows / h;
cv::Mat cv_resize;
cv::resize(cv_src, cv_resize, cv::Size(w, h));
cv::Mat cv_gray, cv_canny;
cv::cvtColor(cv_resize, cv_gray, cv::COLOR_BGR2GRAY);
getCanny(cv_gray, cv_canny);
// extract lines from the edge image
std::vector<cv::Vec4i> lines;
std::vector<Line> horizontals, verticals;
HoughLinesP(cv_canny, lines, 1, CV_PI / 180,80, 60, 8);
for (size_t i = 0; i < lines.size(); i++)
cv::Vec4i v = lines[i];
double delta_x = v[0] - v[2], delta_y = v[1] - v[3];
line(cv_resize, cv::Point(v[0], v[1]), cv::Point(v[2], v[3]), cv::Scalar(0, 0, 255), 1, cv::LINE_AA);
cv::resize(cv_resize, cv_dst, cv_src.size());
1.LSD (M-LSD)一种用于资源受限环境的实时轻量线段检测器。它利用了极其高效的 LSD 架构和新颖的训练方案,包括 SoL 增强和几何学习方案。模型可以在GPU、CPU甚至移动设备上实时运行。算法已开源:https://github.com/navervision/mlsd。如果想更详细的理解算法原理,可以直接看论文。
3.M-LSD ncnn C++ 模型推理代码:
#ifndef MLSD_H
#define MLSD_H
struct Line
cv::Point _p1;
cv::Point _p2;
cv::Point _center;
Line(cv::Point p1, cv::Point p2)
_p1 = p1;
_p2 = p2;
_center = cv::Point((p1.x + p2.x) / 2, (p1.y + p2.y) / 2);
class MLSD
int loadModel(std::string models_path, int _target_size, bool use_gpu = false);
int detect(const cv::Mat& rgb, std::vector<Line>& line, int topk = 200, float score_threshold = 0.1f, float dist_threshold = 20.0f);
int draw(cv::Mat& rgb, const std::vector<Line>& line);
ncnn::Net lsdnet;
int target_size;
const float mean_mlsd[3] = { 127.5f, 127.5f, 127.5f };
const float norm_mlsd[3] = { 1 / 127.5f, 1 / 127.5f, 1 / 127.5f };
ncnn::UnlockedPoolAllocator blob_pool_allocator;
ncnn::PoolAllocator workspace_pool_allocator;
#endif // NANODET_H
#include "MLSD.h"
int MLSD::loadModel(std::string models_path, int _target_size, bool use_gpu)
lsdnet.opt = ncnn::Option();
lsdnet.opt.use_vulkan_compute = use_gpu;
lsdnet.opt.num_threads = ncnn::get_big_cpu_count();
lsdnet.opt.blob_allocator = &blob_pool_allocator;
lsdnet.opt.workspace_allocator = &workspace_pool_allocator;
lsdnet.load_param((models_path + "large.param").c_str());
lsdnet.load_model((models_path + "large.bin").c_str());
target_size = _target_size;
return 0;
int MLSD::detect(const cv::Mat& rgb,std::vector<Line> &lines, int topk, float score_threshold, float dist_threshold)
int out_size = target_size / 2;
ncnn::Extractor ex = lsdnet.create_extractor();
ncnn::Mat ncnn_in = ncnn::Mat::from_pixels_resize(rgb.data,ncnn::Mat::PIXEL_RGB2BGRA, rgb.cols, rgb.rows, target_size, target_size);
ncnn_in.substract_mean_normalize(0, norm_mlsd);
ex.input("input", ncnn_in);
ncnn::Mat org_disp_map, max_map, center_map;
ex.extract("out1", org_disp_map);
ex.extract("Decoder/Sigmoid_4:0", center_map);
ex.extract("out2", max_map);
float* max_map_data = (float*)max_map.data;
float* center_map_data = (float*)center_map.data;
std::vector<std::pair<float, int>> sort_result(max_map.total());
for (int i = 0; i < max_map.total(); i++)
if (max_map_data[i] == center_map_data[i])
sort_result[i] = std::pair<float, int>(max_map_data[i],i);
std::partial_sort(sort_result.begin(), sort_result.begin() + topk, sort_result.end(), std::greater<std::pair<float, int> >());
std::vector<std::pair<int, int>>topk_pts;
for (int i = 0; i < topk; i++)
int x = sort_result[i].second % out_size;
int y = sort_result[i].second / out_size;
topk_pts.push_back(std::pair<int, int>(x, y));
ncnn::Mat start_map = org_disp_map.channel_range(0, 2).clone();
ncnn::Mat end_map = org_disp_map.channel_range(2, 2).clone();
ncnn::Mat dist_map = ncnn::Mat(out_size, out_size, 1);
float* start_map_data = (float*)start_map.data;
float* end_map_data = (float*)end_map.data;
for (int i = 0; i < start_map.total(); i++)
start_map_data[i] = (start_map_data[i] - end_map_data[i]) * (start_map_data[i] - end_map_data[i]);
float* dist_map_data = (float*)dist_map.data;
for (int i = 0; i < start_map.total()/2; i++)
dist_map_data[i] = std::sqrt(start_map_data[i] + start_map_data[i + start_map.channel(0).total()]);
float h_ratio = (float)rgb.rows / target_size;
float w_ratio = (float)rgb.cols / target_size;
for (int i = 0; i < topk_pts.size(); i++)
int x = topk_pts[i].first;
int y = topk_pts[i].second;
float distance = dist_map_data[y * out_size + x];
if (sort_result[i].first > score_threshold && distance > dist_threshold)
int disp_x_start = org_disp_map.channel(0)[y * out_size + x];
int disp_y_start = org_disp_map.channel(1)[y * out_size + x];
int disp_x_end = org_disp_map.channel(2)[y * out_size + x];
int disp_y_end = org_disp_map.channel(3)[y * out_size + x];
int x_start = std::max(std::min((int)((x + disp_x_start) * 2), target_size), 0);
int y_start = std::max(std::min((int)((y + disp_y_start) * 2), target_size), 0);
int x_end = std::max(std::min((int)((x + disp_x_end ) * 2), target_size), 0);
int y_end = std::max(std::min((int)((y + disp_y_end ) * 2), target_size), 0);
lines.push_back(Line{ cv::Point(x_start*w_ratio, y_start*h_ratio), cv::Point(x_end*w_ratio, y_end*h_ratio)});
return 0;
int MLSD::draw(cv::Mat& rgb, const std::vector<Line>& line)
for (auto l : line)
cv::line(rgb, l._p1, l._p2, cv::Scalar(255, 0, 255), 4, 8);
return 0;
#include "MLSD.h"
void lineDetection(cv::Mat& cv_src, cv::Mat& cv_dst);
void mergeImage(std::vector<cv::Mat>& src_vor, cv::Mat& cv_dst, int channel)
cv::Mat img_merge;
cv::Size size(src_vor.at(0).cols * src_vor.size(), src_vor.at(0).rows);
if (channel == 1)
img_merge.create(size, CV_8UC1);
else if (channel == 3)
img_merge.create(size, CV_8UC3);
for (int i = 0; i < src_vor.size(); i++)
cv::Mat cv_temp = img_merge(cv::Rect(src_vor.at(i).cols * i, 0, src_vor.at(i).cols, src_vor.at(i).rows));
cv_dst = img_merge.clone();
int main(void)
MLSD lsd;
lsd.loadModel("models/", 320, true);
std::string path = "images";
std::vector<std::string> filenames;
cv::glob(path, filenames, false);
int i = 0;
for (auto name : filenames)
cv::Mat cv_src = cv::imread(name);
std::vector<Line> lines;
double s_start = static_cast<double>(cv::getTickCount());
lsd.detect(cv_src, lines);
double s_time = ((double)cv::getTickCount() - s_start) / cv::getTickFrequency();
cv::Mat cv_mlsd = cv_src.clone();
lsd.draw(cv_mlsd, lines);
cv::putText(cv_mlsd, "time:" + std::to_string(s_time), cv::Point(10, 60), 2, 1, cv::Scalar(0, 0, 255));
cv::Mat cv_lines;
lineDetection(cv_src, cv_lines);
std::vector<cv::Mat> cv_dsts{ cv_src,cv_mlsd,cv_lines};
cv::Mat cv_dst;
mergeImage(cv_dsts, cv_dst, 3);
cv::imwrite(std::to_string(i)+".jpg", cv_dst);
return 0;
