yolov5+torchlib1.6-cuda+vs2017+opencv4.4-cuda

最近在使用yolov5,网上找了些资料,发现torchlib用来部署yolov5相对更方便,效率更高,

github上也有调用torchlib的代码,但是后处理时间都花费过多,经过我的一番研究后,已经彻底解决了(测试结果为前处理(不包括图片从硬盘加载)8ms,推理15ms(2080ti),后处理7ms),也在github上的torchlib-yolov5--issue后提交了我的回复,

yolov5源码上export.py 导出的是cpu版本的,要改成gpu版本的,github上的链接:https://github.com/yasenh/libtorch-yolov5

下面是我原始的代码:

#include 
#include
#include 
#include
#include
#include
#include
#include
#include
#include 
#include 
#include 
#include 
#include

using namespace std;
using namespace cv;
using namespace dnn;

struct files {//搜索文件的结构体
	String filepath;
	time_t time;
	string filename;
};


vector fileSearch(string path)
{
	int result;
	long long hFile = 0;
	struct stat buf;
	vector allfile;
	files  filed;
	struct _finddata_t fileInfo;
	vector strpath;
	string pathName;
	if ((hFile = _findfirst(pathName.assign(path).append("\\*").c_str(), &fileInfo)) == -1)
		cout << "错误" << endl;
	do {
		if (strcmp(fileInfo.name, "..") == 0 || strcmp(fileInfo.name, ".") == 0) {
		}
		else {
			cout << path + "\\" + fileInfo.name << ":time:" << fileInfo.time_create << endl;
			if (strstr(fileInfo.name, "png") == NULL && strstr(fileInfo.name, "jpg") == NULL) {
				continue;
			}
			pathName = path + "\\" + fileInfo.name;
			result = stat((char*)pathName.data(), &buf);
			if (result != 0) {
				perror("显示文件状态信息出错");
			}
			else {
				filed.filepath = pathName;
				filed.time = buf.st_atime;
				filed.filename = fileInfo.name;
				allfile.push_back(filed);
			}
		}
	} while (_findnext(hFile, &fileInfo) == 0);
	_findclose(hFile);
	return allfile;
}


int main()
{
	// Loading  Module
	torch::jit::script::Module module = torch::jit::load(R"(E:\deeplearning\yolov5\runs\temp\best3.torchscript.pt)");//torchscript
	torch::DeviceType device_type;
	device_type = torch::kCUDA;
	torch::Device device0(device_type);
	module.to(device0);
	module.to(torch::kHalf);
	module.eval();

	Mat frame, img;
	vector allf = fileSearch(R"(D:\图片)");//文件夹路径
	for (int n = 0; n < allf.size(); n++) {
		clock_t start = clock();
		frame = imread(allf[n].filepath);
		cout << clock() - start << "ms-read" << endl;
		if (frame.empty())
		{
			std::cout << "Read frame failed!" << std::endl;
			break;
		}

		// Preparing input tensor
		resize(frame, img, Size(640, 640));
		cvtColor(img, img, COLOR_BGR2RGB);
		img.convertTo(img, CV_32FC3, 1.0f / 255.0f);  // normalization 1/255
		auto tensor_img = torch::from_blob(img.data, { 1, img.rows, img.cols, img.channels() }).to(device0);
		tensor_img = tensor_img.permute({ 0, 3, 1, 2 }).contiguous();  // BHWC -> BCHW (Batch, Channel, Height, Width)
		tensor_img = tensor_img.to(torch::kHalf);
		std::vector inputs;
		inputs.emplace_back(tensor_img);
		torch::jit::IValue output = module.forward(inputs);
		auto detections = output.toTuple()->elements()[0].toTensor();
		auto conf_mask = detections.select(2, 4).ge(0.5).unsqueeze(2);//过滤了score为0.5以下的
		detections = torch::masked_select(detections[0], conf_mask[0]).view({ -1, 85 });//类别+5,这里类别有80个
		detections = detections.to(torch::kFloat);
		detections = detections.to(torch::kCPU);//因为结果是在gpu上的,先拷到cpu,减少通信时间


		clock_t starrrr = clock();
		vector boxes;
		vector confidences;
		float* ptr = (float*)detections.data_ptr();//主要是通过这里的指针取值,缩短了后处理时间
		for (int i = 0; i < (int)detections.size(0); ++i, ptr += 85)//类别+5,这里80个类别
		{
			    float confidence = ptr[4];
				int centerX = (int)((ptr[0] / 640) * frame.cols);
				int centerY = (int)((ptr[1] / 640) * frame.rows);
				int width = (int)((ptr[2] / 640) * frame.cols);
				int height = (int)((ptr[3] / 640) * frame.rows);
				int left = (centerX - width / 2);
				int top = (centerY - height / 2);
				confidences.push_back(confidence);
				boxes.push_back(Rect(max(left, 0), max(top, 0), min(width, frame.cols - left), min(height, frame.rows - top)));

		}
		vector indices;
		NMSBoxes(boxes, confidences, 0.5, 0.2, indices);
		for (size_t i = 0; i < indices.size(); ++i)
		{
			int idx = indices[i];
			Rect box = boxes[idx];
			rectangle(frame, box, Scalar(0, 0, 250), 2, 8, 0);

	}


	return 0;
}

 

你可能感兴趣的:(深度学习,pytorch,神经网络,opencv,cuda)