Yolov3模型框架darknet研究(四)cpu+opencv4.0.1上直接运行yolov3模型

    自从opencv3.4.2开始可以直接运行yolov3模型进行图像识别。 至少我在 windows+opencv4.0.1以及Ubuntu18.04+opencv3.4.3上运行是没有问题的。 

背景

在GPU上训练数据并在GPU上进行推理已经是很平常的事情。但是现实有很多场景是要求希望GPU训练好的算法模型能够在不同的cpu平台上进行推理,比如ARM, intel cpu等。这里以intel cpu为例介绍两种方式:

1)下载darknet c代码并make成lib, 然后调用其lib接口进行图象识别。

2)安装opencv3.43以上的版本,然后调用其API来进行图像识别

方法2优势很明显:

a)官方darknet只提供makefile在linux上编译,windows平台的编译得自己想办法搞定。

b) 更重要的是, opencv针对intel cpu做了很多指令集优化,所以对同一个yolo3得算法模型得推理,方法1要6s左右,而opencv方式只需要600ms左右。

这里还得再说明一点: 网上宣称用opencv API得推理时间只有220ms,但是我的实验结果却要600ms,大概原因有两个:

(1)网上得神经网络size为 416x416  而我的size为608x608  大约为前者得1.5x1.5=2.25

 (2)CPU 不一样。 网上得是i7 6核  我的只有i5 4核 缓存也要差不少。

实现

1。 准备好输入参数,包括网络配置文件,权值,待识别得图片以及识别类型名字

//记得把目录设置正确
String model_config = "xxx/yolov3_2019_01.cfg";
String model_weights = "xxx/yolov3-2019_01_final.weights";
String image_file = "xxx/img20190108_000544.jpg";

vector class_names = get_class_names("xxx/class_2019_01.names");

2。使用opencv API 加载网络模型

// Load the network
	Net net = readNetFromDarknet(model_config, model_weights);
	if (net.empty())
	{
		printf("loading network fails \n");
		return 1;
	}

	net.setPreferableBackend(DNN_BACKEND_OPENCV);
	net.setPreferableTarget(DNN_TARGET_CPU);

3。读取图片,并转换blob四维张量,同时resize成网络模型size

// Create a 4D blob from a frame.
	Mat frame, blob;
	frame = imread(image_file);
	if (frame.empty())
	{
		printf("reading image fails \n");
		return 1;
	}
	
	int newwork_width = 608;
	int newwork_height = 608;
	blobFromImage(frame, blob, 1 / 255.0, Size(newwork_width, newwork_height), Scalar(0, 0, 0), true, false);

4。将blob灌进模型,并进行前向预测(最耗时得部分)

	//Sets the input to the network
	net.setInput(blob);

	// Runs the forward pass to get output of the output layers
	vector outs;
	net.forward(outs, getOutputsNames(net));

识别出来得outs是 3个mat类型数据,为什么是3? 因为darknet有3个yolo检测输出层啊! 这3个mat得shape分别是(4+1+5)x19x19x3, (4+1+5)x38x38x3  (4+1+5)x76x76x3   注:我的模型里面class数量是5

5。从outs里面挑选置信度大于阈值(缺省为0.5)得box,然后再进行NMS做进一步过滤

	// Remove the bounding boxes with low confidence
	postprocess(class_names, frame, outs);



//其调用得子函数如下

// Initialize the parameters
float confThreshold = 0.5; // Confidence threshold
float nmsThreshold = 0.4;  // Non-maximum suppression threshold

// Draw the predicted bounding box 绘出框
void drawPred(vectorclasses, int classId, float conf, int left, int top, int right, int bottom, Mat& frame)
{
	//Draw a rectangle displaying the bounding box 绘制矩形
	rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255));

	//Get the label for the class name and its confidence
	string label = format("%.2f", conf);//分类标签及其置信度

	//若存在类别标签,读取对应的标签
	if (!classes.empty())
	{
		CV_Assert(classId < (int)classes.size());
		label = classes[classId] + ":" + label;
	}

	//Display the label at the top of the bounding box
	int baseLine;
	Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
	top = max(top, labelSize.height);

	//绘制框上文字
	putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(255, 255, 255));
}

// Remove the bounding boxes with low confidence using non-maxima suppression
void postprocess(vectorclasses, Mat& frame, const vector& outs)
{
	vector classIds;
	vector confidences;
	vector boxes;

	for (size_t i = 0; i < outs.size(); ++i)
	{
		// Scan through all the bounding boxes output from the network and keep only the
		// ones with high confidence scores. Assign the box's class label as the class
		// with the highest score for the box.
		float* data = (float*)outs[i].data;

		for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
		{
			int a = outs[i].cols;//中心坐标+框的宽高+置信度+分为各个类别分数=2+2+1+80
			int b = outs[i].rows;//框的个数507

			Mat scores = outs[i].row(j).colRange(5, outs[i].cols);//取当前框的第六列到最后一列,即该框被分为80个类别,各个类别的评分
			Point classIdPoint;
			double confidence;

			// Get the value and location of the maximum score
			minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);//找出最大评分的类别
			if (confidence > confThreshold)//置信度阈值
			{
				int centerX = (int)(data[0] * frame.cols);
				int centerY = (int)(data[1] * frame.rows);
				int width = (int)(data[2] * frame.cols);
				int height = (int)(data[3] * frame.rows);

				int left = centerX - width / 2;
				int top = centerY - height / 2;

				classIds.push_back(classIdPoint.x);
				confidences.push_back((float)confidence);
				boxes.push_back(Rect(left, top, width, height));
			}
		}
	}
	// Perform non maximum suppression to eliminate redundant overlapping boxes with
	// lower confidences
	vector indices;
	NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);//框、置信度、置信度阈值、非极大值抑制阈值、指标(输出)
	for (size_t i = 0; i < indices.size(); ++i)
	{
		int idx = indices[i];//框序号
		Rect box = boxes[idx];//框的坐标(矩形区域)

		drawPred(classes, classIds[idx], confidences[idx], box.x, box.y,
			box.x + box.width, box.y + box.height, frame);
	}
}

// Get the names of the output layers
vector getOutputsNames(const Net& net)
{
	static vector names;

	if (names.empty())
	{
		//Get the indices of the output layers, i.e. the layers with unconnected outputs
		vector outLayers = net.getUnconnectedOutLayers();

		//get the names of all the layers in the network
		vector layersNames = net.getLayerNames();

		// Get the names of the output layers in names
		names.resize(outLayers.size());

		for (size_t i = 0; i < outLayers.size(); ++i)
			names[i] = layersNames[outLayers[i] - 1];
	}

	return names;
}

 

你可能感兴趣的:(深度学习)