一点就分享系列(实践篇)“大概率是全网首发”——文字检测(OCR系列)之后处理:文本构造线算法(OpenCv )版本 “为部署而生”!!!

  • 前言
  • 一、文本线构造什么鬼?为什么需要?
  • 二、文本线构造方法的“身世”
    • 1.来源和体会
    • 2.代码(CPP版本部署神器)
  • 总结



1)移植性:只需要Opencv DNN即可使用
2)易用性: 封装的接口,直接调用,输入为BOX信息,该结构我换成了Opencv的Rect,为了可读性和移植性,输出依旧为Rect结构!
3)可塑性 :这个代码是通过我之前的一个版本改过来的,因为数据结构和库都是更多的第三方,所以耦合性太强,代码有很多不足之处和需要优化的地方,可以供读者拿来优化,比如可以将NMS可以改成fast,softer版本试试!


// 文本线构造OPENCV版本(demo版)
// 对接目标检测的后处理的BOX 输入为检测模型的BOX 输出为文本合并后的BOX
// 输入的数据结构是自带的RECT 可以自己定义只要包含BOX的信息即可(啥都会一点的小程)
//这些都是常用库OPENCV DNN没什么特殊的
#define MIN_V_OVERLAPS 0.6
#define MIN_SIZE_SIM 0.7 
using namespace std;
using namespace cv;
using namespace dnn;

typedef struct node {
	int index;
	struct node *next;
	struct node *right;

bool sortFun(Rect& p1, Rect& p2)
	return p1.x < p2.x;//升序排列  

float max1(float a, float b)
	return a > b ? a : b;

float min1(float a, float b)
	return a > b ? b : a;

int meet_v_iou(int index1, int index2, std::vector<Rect> binfo)//计算两个BOX的重合度
{   //竖直方向重合程度
	float h = binfo[index1].height + 1;
	float h1 = binfo[index2].height + 1;

	float y = max1(binfo[index1].y, binfo[index2].y);
	float y1 = min1(binfo[index1].y + binfo[index1].height, binfo[index2].y + binfo[index2].height);

	float overlap = max1(0, y1 - y + 1) / min1(h, h1);

	float similarity = min1(h, h1) / max1(h, h1);

	if (overlap >= MIN_V_OVERLAPS && similarity >= MIN_SIZE_SIM)//判断
		return 1;
	return 0;

node *get_successions(int index, node* boxes_table, std::vector<Rect> binfo, int len, int img_w)
//node *get_successions(int index, node* boxes_table, float text_proposals[][4], float score[], int len, int img_w)

	node *ret = new node[sizeof(node)];
	ret->index = -1;
	ret->right = NULL;
	// 防止和自己重合
	for (int start = binfo[index].x + 1; start < (int)min1(binfo[index].x + 30, img_w); start++)
		node  head = boxes_table[start], *cur = &head;
		while (cur && cur->index != -1)
			if (meet_v_iou(cur->index, index, binfo))
				//add a new node to the list
				if (ret->index == -1)
					ret->index = cur->index;
					// node *n = (node*)malloc(sizeof(node));
					node *n = new node[sizeof(node)];
					n->index = cur->index;
					n->right = ret->right;
					ret->right = n;
			cur = cur->right;
		if (ret->index != -1)
			return ret;
	return ret;

node *get_precursors(int index, node* boxes_table, std::vector<Rect> binfo, int len, int img_w)

	node *ret = new node[sizeof(node)];
	ret->index = -1;
	ret->right = NULL;
	for (int start = binfo[index].x - 1; start >= (int)max1(binfo[index].x - 50, 0); start--)
		node  head = boxes_table[start], *cur = &head;
		while (cur && cur->index != -1)
			if (meet_v_iou(cur->index, index, binfo))
				//加入 new node to the list
				if (ret->index == -1)
					ret->index = cur->index;
					node *n = new node[sizeof(node)];
					n->index = cur->index;
					n->right = ret->right;
					ret->right = n;
			cur = cur->right;
		if (ret->index != -1)
			return ret;
	return ret;

int is_successions(int index, int succession_index, node *boxes_table, std::vector<Rect> binfo, std::vector<float> confidences, int len, int img_w)
	node *ret = get_precursors(succession_index, boxes_table, binfo, len, img_w);
	node *precursors = ret;
	//get the max index 
	float max = 0;
	int max_index = -1;
	while (precursors  && precursors->index != -1) {
		if (confidences[precursors->index]>max)
			max = confidences[precursors->index];
			max_index = precursors->index;
		precursors = precursors->right;

	while (ret)
		node *tmp = ret;
		ret = ret->right;
		delete tmp;

	if (confidences[index] >= confidences[max_index])
		return 1;
	return 0;

void gen_graph(std::vector<Rect> binfo, vector<float>confidences,
	int img_w, int img_h, unsigned char **graph)
	//gen_graph(float text_proposals[][4], float score[], int len,int img_h, int img_w, unsigned char **graph)
	int len = binfo.size();
	//创建 boxes table 和 初始化:对图像中水平方向的每个像素点 建立列表
	node * boxes_table = new node[img_w*(sizeof(node))];
	for (int i = 0; i < img_w; i++)
		boxes_table[i].index = -1;
		boxes_table[i].right = NULL;

	for (int i = 0; i < len; i++)
		int x = binfo[i].x;
		//first time to insert
		if (boxes_table[x].index == -1)
			boxes_table[x].index = i;
			// node *n = (node*)malloc(sizeof(node));
			node *n = new node[sizeof(node)];
			n->index = i;
			n->right = boxes_table[x].right;
			boxes_table[x].right = n;
	for (int i = 0; i < len; i++)
		node *ret = get_successions(i, boxes_table, binfo, len, img_w);//检测右边配对算法
		node *successions = ret;//index:对应的Box和右侧30个像素的所有BOX 竖直放重合度高的BOX的INDEX
								//得到 max index 
		float max = 0;
		int max_index = -1;
		while (successions  && successions->index != -1) {
			if (confidences[successions->index]>max)
				max = confidences[successions->index];
				max_index = successions->index;
			successions = successions->right;
		if (max_index == -1) continue;
		//      留下该index的box对应successions的box相应scores最高的index

		if (is_successions(i, max_index, boxes_table, binfo, confidences, len, img_w))
			graph[i][max_index] = 1;
		//FREE 内存
		while (ret) {
			node* tmp = ret;
			ret = ret->right;
			delete tmp;

	for (int i = 0; i < img_w; i++)
		node *n = boxes_table[i].right;
		while (n) {
			node *tmp = n;
			n = n->right;
			delete tmp;


void sub_graphs_connected(unsigned char **graph, int len, node *sub_graphs)
	node *cur = sub_graphs;
	for (int i = 0; i < len; i++)
		int j = 0;
		while (j<len && graph[j][i] == 0) j++;
		if (j < len)
		//判断当前 node 有没有 next node
		j = 0;
		while (j<len && graph[i][j] == 0) j++;
		if (j == len)

		//add the first node to sub_graphs
		if (cur->index == -1) {
			cur->index = i;
			node *n = (node *)malloc(sizeof(node));
			n->index = i;
			n->next = NULL;
			n->right = NULL;
			cur->next = n;
			cur = n;

		//judge the node whether it includes next node
		int k = i;
		node *rcur = cur;
		while (rcur->right) rcur = rcur->right;
		while (1)
			j = 0;
			while (j<len && graph[k][j] == 0) j++;
			if (j == len)
			k = j;
			// node *n = (node*)malloc(sizeof(node));
			node *n = new node[sizeof(node)];
			n->index = j;
			n->right = NULL;
			rcur->right = n;
			rcur = n;


std::vector<Rect>get_text_lines(std::vector<Rect> binfo, std::vector<float>confidences, std::vector<int>classIds, int img_w, int img_h)
	int len = binfo.size();
	// unsigned char **graph = (unsigned char**)malloc(len*sizeof(unsigned char*));
	unsigned char **graph = new unsigned char*[len];//分配二维数组内存:
	std::vector<Rect> result_objects;
	for (int i = 0; i < len; i++)
		// graph[i] = (unsigned char*)malloc(len*sizeof(unsigned char));
		graph[i] = new unsigned char[len];
		// memset(graph[i], 0, len);
	for (int i = 0; i<len; ++i)
		for (int j = 0; j<len; ++j)
			graph[i][j] = 0;
	gen_graph(binfo, confidences, img_w, img_h, graph);
	node *sub_graphs = new node[sizeof(node)];
	sub_graphs->index = -1;
	sub_graphs->right = NULL;
	sub_graphs->next = NULL;
	sub_graphs_connected(graph, len, sub_graphs);
	node *cur = sub_graphs;//text proposal
	while (cur && cur->index != -1)
		node *lcur = cur;

		float y1 = binfo[lcur->index].y;
		float y2 = binfo[lcur->index].y + binfo[lcur->index].height;
		float x1 = binfo[lcur->index].x;
		float x2 = binfo[lcur->index].x + binfo[lcur->index].width;
		int num = 0;
		float sco = 0;
		while (lcur) {
			sco += confidences[lcur->index];

			if (binfo[lcur->index].x < x1)
				x1 = binfo[lcur->index].x;

			if (binfo[lcur->index].x + binfo[lcur->index].width >x2)
				x2 = binfo[lcur->index].x + binfo[lcur->index].width;

			if (binfo[lcur->index].y< y1)
				y1 = binfo[lcur->index].y;

			if (binfo[lcur->index].y + binfo[lcur->index].height >y2)
				y2 = binfo[lcur->index].y + binfo[lcur->index].height;
			lcur = lcur->right;
		// # the score of a text line is the average score of the scores
		// # of all text proposals contained in the text line
		sco = sco / num;
		int m = (x2 - x1) / num * 0.5;//offset
		x1 = x1 - m;
		x2 = x2 + m;
		Rect p;
		p.x = x1;
		p.y = y1;
		//p.width = (x2+x1)/2.0;
		p.width = x2 - x1;
		//p.height = (y1+y2)/2.0;
		p.height = y2 - y1;
		//detectionConfidence = sco;
		//p.classId = 1;
		cur = cur->next;//继续下个
	while (sub_graphs) {
		node *cur = sub_graphs;
		sub_graphs = sub_graphs->next;
		while (cur) {
			node *tmp = cur;
			cur = cur->right;

	return result_objects;

nonMaximumSuppression(float nmsThresh, std::vector<Rect> binfo, vector<float> confidences)

	auto overlap1D = [](float x1min, float x1max, float x2min, float x2max) -> float {
		if (x1min > x2min)
			std::swap(x1min, x2min);
			std::swap(x1max, x2max);
		return x1max < x2min ? 0 : std::min(x1max, x2max) - x2min;
	auto computeIoU= [&overlap1D](Rect& bbox1, Rect& bbox2) -> float {
		float overlapX= overlap1D(bbox1.x, bbox1.x + bbox1.width, bbox2.x, bbox2.x + bbox2.width);
		float overlapY= overlap1D(bbox1.y, bbox1.y + bbox1.height, bbox2.y, bbox2.y + bbox2.height);
		float area1 = (bbox1.width) * (bbox1.height);
		float area2 = (bbox2.width) * (bbox2.height);
		float overlap2D = overlapX * overlapY;
		float u = area1 + area2 - overlap2D;
		return u == 0 ? 0 : overlap2D / u;
	std::stable_sort(confidences.begin(), confidences.end(),
		[](float b1, float b2)
		return b1 > b2;
	std::vector<Rect> out;
	for (auto i : binfo)
		bool keep = true;
		for (auto j : out)
			if (keep)
				float overlap = computeIoU(i, j);
				keep = overlap <= nmsThresh;
		if (keep) out.push_back(i);
	return out;
text_line_drop(std::vector<Rect>  boxes, vector<float>  confidences, vector<int>classIds, int width, int height)
	std::vector<Rect> result;
	//std::vector> splitBoxes(numClasses);
	std::vector<int> boxIndex;
	std::vector<Rect> imp_objects;
	std::vector<Rect> tmp_imp_objects;
	std::vector<Rect> result_objects;
	int i = 0;
	float nmsThresh = 0.2; //0.3
	for (auto& box_conf : confidences)
		if (box_conf > 0.3)  //0.5
	for (auto& index : boxIndex)
	sort(imp_objects.begin(), imp_objects.end(), sortFun);

	tmp_imp_objects = nonMaximumSuppression(nmsThresh, imp_objects, confidences);
	result_objects = get_text_lines(tmp_imp_objects, confidences, classIds, width, height);/* 在此将碎片框合并成一行,存储在*/																						   //std::cout<<"66666::: "<<"result"<
	return result_objects;
void text_detect(Mat img, int scale, int maxScale, float prob, std::vector<Rect>& objectList)
	Mat orgimg = img.clone();
	int image_w = img.cols;
	int image_h = img.rows;
	int w = scale;
	int h = scale;
	int new_w = int(image_w * min(w * 1.0 / image_w, h * 1.0 / image_h));
	int new_h = int(image_h * min(w* 1.0 / image_w, h * 1.0 / image_h));
	cv::Mat img_size;
	cv::resize(img, img_size, cv::Size(new_w, new_h), INTER_LINEAR);
	Mat new_image(608, 608, CV_8UC3);
	for (int i = 0; i < new_image.rows; i++)
		Vec3b* p = new_image.ptr<Vec3b>(i);
		for (int j = 0; j < new_image.cols; j++)
			p[j][0] = 128;
			p[j][1] = 128;
			p[j][2] = 128;
	for (int i = 0; i < img_size.rows; i++)
		for (int j = 0; j < img_size.cols; j++)
			new_image.at<Vec3b>(i, j)[0] = img_size.at<Vec3b>(i, j)[0];
			new_image.at<Vec3b>(i, j)[1] = img_size.at<Vec3b>(i, j)[1];
			new_image.at<Vec3b>(i, j)[2] = img_size.at<Vec3b>(i, j)[2];
	float f = (float)new_w / (float)image_w;
	Net net = cv::dnn::readNetFromDarknet("./model/text.cfg", "./model/text.weights"); //模型路径
	cv::Mat inputBlob = cv::dnn::blobFromImage(new_image, 1.0 / 255, Size(608, 608), Scalar(), false, false);
	net.forward(outs, net.getUnconnectedOutLayersNames());
	for (size_t i = 0; i < outs.size(); i++)
		float* data = (float*)outs[i].data;
		for (int j = 0; j < outs[i].rows; j++, data += outs[i].cols)
			Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
			Point classIdPoint;
			double confidence;
			minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
			if (confidence > 0.05)
				int center_x = (int)(data[0] * scale / f);
				int center_y = (int)(data[1] * scale / f);
				int width = (int)(data[2] * scale / f);
				int height = (int)(data[3] * scale / f);
				int left = int(center_x - width / 2);
				int top = int(center_y - height / 2);
				boxes.push_back(Rect(left, top, width, height));
	for (size_t i = 0; i < boxes.size(); i++)
			Rect box = boxes[i];
			cout << box << endl;
			int left = box.x;
			int top = box.y;
			int right = box.x + box.width;
			int bottom = box.y + box.height;
			rectangle(img, Point(left, top), Point(right, bottom), Scalar(0, 0, 255));
	imshow("img1", img);
	int highh = 608, widd = 608;//这个宽高跟你网络模型确定 自己看下怎么算的 
	objectList = text_line_drop(boxes, confidences, classIds, widd, highh);//文本线构造接口
	for (size_t i = 0; i < objectList.size(); ++i)
		Rect box = objectList[i];
		cout << box << endl;
		int left = box.x;
		int top = box.y;
		int right = box.x + box.width;
		int bottom = box.y + box.height;
		rectangle(orgimg, Point(left, top), Point(right, bottom), Scalar(0, 0, 255));
	imshow("img2", orgimg);

int main()
	Mat img = imread("3.jpg"); //图片路径
	if (img.empty())
		std::cout<< "图片读取失败" <<std::endl;
	std::vector<Rect> objectList;
	text_detect(img, 608, 608, 0.05, objectList);//封装好的文本线构造接口

	return 0;


