yolov5 C++推理(libtorch和onnxruntime框架)

libtorch推理

#include 
#include 
#include 
#include 
#include 


std::vector<torch::Tensor> non_max_suppression(torch::Tensor preds, float score_thresh = 0.01, float iou_thresh = 0.35)
{
	std::vector<torch::Tensor> output;
	for (size_t i = 0; i < preds.sizes()[0]; ++i)
	{
		torch::Tensor pred = preds.select(0, i);

		pred = pred.to(at::kCPU); //GPU推理结果为cuda数据类型,nms之前要转成cpu,否则会报错; 注意preds的数据类型,转成cpu进行后处理。

		// Filter by scores
		torch::Tensor scores = pred.select(1, 4) * std::get<0>(torch::max(pred.slice(1, 5, pred.sizes()[1]), 1));
		pred = torch::index_select(pred, 0, torch::nonzero(scores > score_thresh).select(1, 0));
		if (pred.sizes()[0] == 0) continue;

		// (center_x, center_y, w, h) to (left, top, right, bottom)
		pred.select(1, 0) = pred.select(1, 0) - pred.select(1, 2) / 2;
		pred.select(1, 1) = pred.select(1, 1) - pred.select(1, 3) / 2;
		pred.select(1, 2) = pred.select(1, 0) + pred.select(1, 2);
		pred.select(1, 3) = pred.select(1, 1) + pred.select(1, 3);

		// Computing scores and classes
		std::tuple<torch::Tensor, torch::Tensor> max_tuple = torch::max(pred.slice(1, 5, pred.sizes()[1]), 1);
		pred.select(1, 4) = pred.select(1, 4) * std::get<0>(max_tuple);
		pred.select(1, 5) = std::get<1>(max_tuple);

		torch::Tensor  dets = pred.slice(1, 0, 6);

		torch::Tensor keep = torch::empty({ dets.sizes()[0] });
		torch::Tensor areas = (dets.select(1, 3) - dets.select(1, 1)) * (dets.select(1, 2) - dets.select(1, 0));
		std::tuple<torch::Tensor, torch::Tensor> indexes_tuple = torch::sort(dets.select(1, 4), 0, 1);
		torch::Tensor v = std::get<0>(indexes_tuple);
		torch::Tensor indexes = std::get<1>(indexes_tuple);
		int count = 0;
		while (indexes.sizes()[0] > 0)
		{
			keep[count] = (indexes[0].item().toInt());
			count += 1;

			// Computing overlaps
			torch::Tensor lefts = torch::empty(indexes.sizes()[0] - 1);
			torch::Tensor tops = torch::empty(indexes.sizes()[0] - 1);
			torch::Tensor rights = torch::empty(indexes.sizes()[0] - 1);
			torch::Tensor bottoms = torch::empty(indexes.sizes()[0] - 1);
			torch::Tensor widths = torch::empty(indexes.sizes()[0] - 1);
			torch::Tensor heights = torch::empty(indexes.sizes()[0] - 1);
			for (size_t i = 0; i < indexes.sizes()[0] - 1; ++i)
			{
				lefts[i] = std::max(dets[indexes[0]][0].item().toFloat(), dets[indexes[i + 1]][0].item().toFloat());
				tops[i] = std::max(dets[indexes[0]][1].item().toFloat(), dets[indexes[i + 1]][1].item().toFloat());
				rights[i] = std::min(dets[indexes[0]][2].item().toFloat(), dets[indexes[i + 1]][2].item().toFloat());
				bottoms[i] = std::min(dets[indexes[0]][3].item().toFloat(), dets[indexes[i + 1]][3].item().toFloat());
				widths[i] = std::max(float(0), rights[i].item().toFloat() - lefts[i].item().toFloat());
				heights[i] = std::max(float(0), bottoms[i].item().toFloat() - tops[i].item().toFloat());
			}
			torch::Tensor overlaps = widths * heights;

			// FIlter by IOUs
			torch::Tensor ious = overlaps / (areas.select(0, indexes[0].item().toInt()) + torch::index_select(areas, 0, indexes.slice(0, 1, indexes.sizes()[0])) - overlaps);
			indexes = torch::index_select(indexes, 0, torch::nonzero(ious <= iou_thresh).select(1, 0) + 1);
		}
		keep = keep.toType(torch::kInt64);
		output.push_back(torch::index_select(dets, 0, keep.slice(0, 0, count)));
	}
	return output;
}


int main(int argc, char* argv[])
{
	torch::DeviceType device_type = at::kCPU; // 定义设备类型
	if (torch::cuda::is_available())
	{
		device_type = at::kCUDA;
		std::cout << "cuda::is_available():" << torch::cuda::is_available() << std::endl;
	}
		
	// Loading  Module
	torch::jit::script::Module module = torch::jit::load("yolov5n-gpu.torchscript");
	module.to(device_type); 

	std::vector<std::string> classnames;
	std::ifstream f("class_det.txt");
	std::string name;
	while (std::getline(f, name))
	{
		classnames.push_back(name);
	}

	cv::Mat frame = cv::imread("bus.jpg"), img;

	// Preparing input tensor
	cv::resize(frame, img, cv::Size(640, 640));
	cv::cvtColor(img, img, cv::COLOR_BGR2RGB);  // BGR -> RGB
	img.convertTo(img, CV_32FC3, 1.0f / 255.0f);  // normalization 1/255
	auto imgTensor = torch::from_blob(img.data, { 1, img.rows, img.cols, img.channels() }).to(device_type);
	imgTensor = imgTensor.permute({ 0, 3, 1, 2 }).contiguous();  // BHWC -> BCHW (Batch, Channel, Height, Width)
	std::vector<torch::jit::IValue> inputs;
	inputs.emplace_back(imgTensor);

	torch::jit::IValue output = module.forward(inputs);
	auto preds = output.toTuple()->elements()[0].toTensor();
	std::vector<torch::Tensor> dets = non_max_suppression(preds, 0.35, 0.5);
	if (dets.size() > 0)
	{
		for (size_t i = 0; i < dets[0].sizes()[0]; ++i)
		{
			float left = dets[0][i][0].item().toFloat() * frame.cols / 640;
			float top = dets[0][i][1].item().toFloat() * frame.rows / 640;
			float right = dets[0][i][2].item().toFloat() * frame.cols / 640;
			float bottom = dets[0][i][3].item().toFloat() * frame.rows / 640;
			float score = dets[0][i][4].item().toFloat();
			int classID = dets[0][i][5].item().toInt();

			cv::rectangle(frame, cv::Rect(left, top, (right - left), (bottom - top)), cv::Scalar(0, 255, 0), 2);
			cv::putText(frame, classnames[classID] + ": " + cv::format("%.2f", score),
				cv::Point(left, top), cv::FONT_HERSHEY_SIMPLEX, (right - left) / 200, cv::Scalar(0, 255, 0), 2);
		}
	}
	cv::imshow("", frame);
	cv::waitKey();
	
	return 0;
}

onnxruntime推理

#include 
#include 
#include 
#include 


void LetterBox(const cv::Mat& image, cv::Mat& outImage,
	cv::Vec4d& params, //[ratio_x,ratio_y,dw,dh]
	const cv::Size& newShape = cv::Size(640, 640),
	bool autoShape = false,
	bool scaleFill = false,
	bool scaleUp = true,
	int stride = 32,
	const cv::Scalar& color = cv::Scalar(114, 114, 114))
{
	cv::Size shape = image.size();
	float r = std::min((float)newShape.height / (float)shape.height, (float)newShape.width / (float)shape.width);
	if (!scaleUp)
	{
		r = std::min(r, 1.0f);
	}

	float ratio[2]{ r, r };
	int new_un_pad[2] = { (int)std::round((float)shape.width * r),(int)std::round((float)shape.height * r) };

	auto dw = (float)(newShape.width - new_un_pad[0]);
	auto dh = (float)(newShape.height - new_un_pad[1]);

	if (autoShape)
	{
		dw = (float)((int)dw % stride);
		dh = (float)((int)dh % stride);
	}
	else if (scaleFill)
	{
		dw = 0.0f;
		dh = 0.0f;
		new_un_pad[0] = newShape.width;
		new_un_pad[1] = newShape.height;
		ratio[0] = (float)newShape.width / (float)shape.width;
		ratio[1] = (float)newShape.height / (float)shape.height;
	}

	dw /= 2.0f;
	dh /= 2.0f;

	if (shape.width != new_un_pad[0] && shape.height != new_un_pad[1])
		cv::resize(image, outImage, cv::Size(new_un_pad[0], new_un_pad[1]));
	else
		outImage = image.clone();

	int top = int(std::round(dh - 0.1f));
	int bottom = int(std::round(dh + 0.1f));
	int left = int(std::round(dw - 0.1f));
	int right = int(std::round(dw + 0.1f));
	params[0] = ratio[0];
	params[1] = ratio[1];
	params[2] = left;
	params[3] = top;
	cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
}


void nms(std::vector<cv::Rect>& boxes, std::vector<float>& confs, std::vector<int>& classIds, float threshold)
{
	assert(boxes.size() == confs.size());

	struct BoxConf
	{
		cv::Rect box;
		float conf;
		int id;
	};
	std::vector<BoxConf> boxes_confs;
	for (size_t i = 0; i < boxes.size(); i++)
	{
		BoxConf box_conf;
		box_conf.box = boxes[i];
		box_conf.conf = confs[i];
		box_conf.id = classIds[i];
		boxes_confs.push_back(box_conf);
	}

	std::sort(boxes_confs.begin(), boxes_confs.end(), [](BoxConf a, BoxConf b) { return a.conf > b.conf; });

	std::vector<float> area(boxes_confs.size());
	for (size_t i = 0; i < boxes_confs.size(); ++i)
	{
		area[i] = boxes_confs[i].box.width * boxes_confs[i].box.height;
	}

	std::vector<bool> isSuppressed(boxes_confs.size(), false);
	for (size_t i = 0; i < boxes_confs.size(); ++i)
	{
		if (isSuppressed[i])	{ continue; }
		for (size_t j = i + 1; j < boxes_confs.size(); ++j)
		{
			if (isSuppressed[j])	{ continue; }
			float x1 = (std::max)(boxes_confs[i].box.x, boxes_confs[j].box.x);
			float y1 = (std::max)(boxes_confs[i].box.y, boxes_confs[j].box.y);
			float x2 = (std::min)(boxes_confs[i].box.x + boxes_confs[i].box.width, boxes_confs[j].box.x + boxes_confs[j].box.width);
			float y2 = (std::min)(boxes_confs[i].box.y + boxes_confs[i].box.height, boxes_confs[j].box.y + boxes_confs[j].box.height);

			float w = (std::max)(0.0f, x2 - x1);
			float h = (std::max)(0.0f, y2 - y1);
			float inter = w * h;
			float ovr = inter / (area[i] + area[j] - inter);

			if (ovr >= threshold) { isSuppressed[j] = true; }
		}
	}
	
	boxes.clear();
	confs.clear();
	classIds.clear();
	for (int i = 0; i < boxes_confs.size(); ++i)
	{
		if (!isSuppressed[i])
		{
			boxes.push_back(boxes_confs[i].box);
			confs.push_back(boxes_confs[i].conf);
			classIds.push_back(boxes_confs[i].id);
		}
	}
}


int main(int argc, char* argv[])
{
	std::string imgpath = "zidane.jpg";
	Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "yolov5s");
	Ort::SessionOptions session_options;
	session_options.SetIntraOpNumThreads(1);//设置线程数
	session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);//启用模型优化策略

	//CUDA option set
	OrtCUDAProviderOptions cuda_option;
	cuda_option.device_id = 0;
	cuda_option.arena_extend_strategy = 0;
	cuda_option.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive;
	cuda_option.gpu_mem_limit = SIZE_MAX;
	cuda_option.do_copy_in_default_stream = 1;
	session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
	session_options.AppendExecutionProvider_CUDA(cuda_option);

	const wchar_t* model_path = L"yolov5s-det.onnx"; //表示一个指向常量宽字符字符串L"yolov5s.onnx"的指针

	std::vector<std::string> class_names;
	std::string classesFile = "coco.names";//加载label
	std::ifstream ifs(classesFile.c_str());//用了classesFile.c_str()函数将字符串classesFile转换为C风格的字符串(即以null结尾的字符数组),并将其作为参数传递给std::ifstream类的构造函数。
	std::string line;
	while (getline(ifs, line)) class_names.push_back(line);

	//第一个参数env是一个ORT环境对象,用于管理ORT会话的资源和配置。
	//第二个参数model_path是一个指向ONNX模型文件的路径的常量指针,用于指定ORT会话要加载的模型。
   // 第三个参数session_options是一个ORT会话选项对象,用于配置ORT会话的选项和优化策略。
	Ort::Session session(env, model_path, session_options);
	Ort::AllocatorWithDefaultOptions allocator;//通过使用Ort::AllocatorWithDefaultOptions类,可以方便地进行内存分配和管理,而无需手动实现内存分配和释放的细节。

	size_t num_input_nodes = session.GetInputCount();
	std::vector<const char*> input_node_names = { "images" };
	std::vector<const char*> output_node_names = { "output" };

	const size_t input_tensor_size = 3 * 640 * 640;
	std::vector<float> input_tensor_values(input_tensor_size);
	cv::Mat srcimg = cv::imread(imgpath);
	int newh = 0, neww = 0, padh = 0, padw = 0;

	cv::Mat dstimg;
	cv::Vec4d params;
	cv::Mat letterbox;
	LetterBox(srcimg, dstimg, params);

	for (int c = 0; c < 3; c++)
	{
		for (int i = 0; i < 640; i++)
		{
			for (int j = 0; j < 640; j++)
			{
				float pix = dstimg.ptr<uchar>(i)[j * 3 + 2 - c];//转换通道,输入onnx模型的图片通道顺序是RGB,但是opencv存储默认是BGR
				input_tensor_values[c * 640 * 640 + i * 640 + size_t(j)] = pix / 255.0;//归一化
			}
		}
	}
	// create input tensor object from data values
	std::vector<int64_t> input_node_dims = { 1, 3, 640, 640 };
	auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
	Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), input_node_dims.size());

	std::vector<Ort::Value> ort_inputs;
	ort_inputs.push_back(std::move(input_tensor));//右值引用,避免不必要的拷贝和内存分配操作
	// score model & input tensor, get back output tensor
	std::vector<Ort::Value> output_tensors;
	for (int i = 0; i < 10; i++)
	{
		clock_t start_time = clock();
		output_tensors = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), ort_inputs.data(), input_node_names.size(), output_node_names.data(), output_node_names.size());
		clock_t end_time = clock();

		std::cout << "inference time: " << static_cast<double>(end_time - start_time) / CLOCKS_PER_SEC << " seconds" << std::endl;
	}

	// Get pointer to output tensor float values
	const float* rawOutput = output_tensors[0].GetTensorData<float>();
	//generate proposals
	std::vector<int64_t> outputShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();
	size_t count = output_tensors[0].GetTensorTypeAndShapeInfo().GetElementCount();
	std::vector<float> output(rawOutput, rawOutput + count);

	std::vector<cv::Rect> boxes;
	std::vector<float> confs;
	std::vector<int> classIds;
	int numClasses = (int)outputShape[2] - 5;
	int elementsInBatch = (int)(outputShape[1] * outputShape[2]);

	float confThreshold = 0.5;	//置信度阈值
	float iouThreshold = 0.5;	//iou阈值
	for (auto it = output.begin(); it != output.begin() + elementsInBatch; it += outputShape[2])
	{
		float clsConf = *(it + 4);
		if (clsConf > confThreshold)
		{
			int centerX = (int)(*it);
			int centerY = (int)(*(it + 1));
			int width = (int)(*(it + 2));
			int height = (int)(*(it + 3));
			int x1 = centerX - width / 2;
			int y1 = centerY - height / 2;
			boxes.emplace_back(cv::Rect(x1, y1, width, height));

			// first 5 element are x y w h and obj confidence
			int bestClassId = -1;
			float bestConf = 0.0;
			for (int i = 5; i < numClasses + 5; i++)
			{
				if ((*(it + i)) > bestConf)
				{
					bestConf = it[i];
					bestClassId = i - 5;
				}
			}

			confs.push_back(clsConf);
			classIds.push_back(bestClassId);
		}
	}

	//极大值抑制
	nms(boxes, confs, classIds, iouThreshold);

	//结果可视化
	for (size_t i = 0; i < boxes.size(); ++i)
	{
		cv::rectangle(dstimg, cv::Point(boxes[i].tl().x, boxes[i].tl().y), cv::Point(boxes[i].br().x, boxes[i].br().y), cv::Scalar(0, 0, 255), 1);
		cv::putText(dstimg, class_names[classIds[i]] + " " + std::to_string(confs[i]), cv::Point(boxes[i].tl().x, boxes[i].tl().y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 0, 0), 1);
	}
	cv::imshow("result", dstimg);
	cv::waitKey();
	return 0;
}

你可能感兴趣的:(deep,learning,#,model,deployment,yolov5,C++,libtorch,onnxruntime)