通过查阅大量资料,在@Christo3、@wang-xinyu、@一笑奈何HYL等人的工作基础上,完成yolov5s通过tensort,导出为dll文件,python、c++调用dll文件。
软件安装,及其环境变量配置请参考上述人员的博客,必须基于yolov5(6.0版本),如有报错请考虑软件版本、环境变量是否匹配。
(1)Windows 10、NVIDIA GeForce GTX 1050 Ti
(2)Visual Studio 2019 Community 下载地址
(3)cuda_10.2.89_441.22_win10下载地址
(4)cudnn-windows-x86_64-8.4.0.27_cuda10.2-archive下载地址
(4)TensorRT-8.4.1.5下载地址
(5)opencv4.5.1下载地址
(6)tensorrtx-yolov5-v6.0下载地址
(7)Yolov5(v6.0)下载地址
(8)CMake3.24.2下载地址
cmake_minimum_required(VERSION 2.6)
project(yolov5)
#change to your own path
##################################################
set(OpenCV_DIR "D:\\Tool\\opencv4.5.1\\opencv\\build") #2
set(TRT_DIR "D:\\AITool\\TensorRT\\TensorRT-8.4.1.5") #3
set(Dirent_INCLUDE_DIRS "D:\\AITool\\TensorRT\\TensorRT-8.4.1.5\\include") #10
##################################################
add_definitions(-std=c++11)
add_definitions(-DAPI_EXPORTS)
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Debug)
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads)
# setup CUDA
find_package(CUDA REQUIRED)
message(STATUS " libraries: ${CUDA_LIBRARIES}")
message(STATUS " include path: ${CUDA_INCLUDE_DIRS}")
include_directories(${CUDA_INCLUDE_DIRS})
include_directories(${Dirent_INCLUDE_DIRS})
#change to your GPU own compute_XX
###########################################################################################
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-std=c++11;-g;-G;-gencode;arch=compute_60;code=sm_60)
###########################################################################################
####
enable_language(CUDA) # add this line, then no need to setup cuda path in vs
####
include_directories(${PROJECT_SOURCE_DIR}/include)
include_directories(${TRT_DIR}\\include)
# -D_MWAITXINTRIN_H_INCLUDED for solving error: identifier "__builtin_ia32_mwaitx" is undefined
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -D_MWAITXINTRIN_H_INCLUDED")
# setup opencv
find_package(OpenCV QUIET
NO_MODULE
NO_DEFAULT_PATH
NO_CMAKE_PATH
NO_CMAKE_ENVIRONMENT_PATH
NO_SYSTEM_ENVIRONMENT_PATH
NO_CMAKE_PACKAGE_REGISTRY
NO_CMAKE_BUILDS_PATH
NO_CMAKE_SYSTEM_PATH
NO_CMAKE_SYSTEM_PACKAGE_REGISTRY
)
message(STATUS "OpenCV library status:")
message(STATUS " version: ${OpenCV_VERSION}")
message(STATUS " libraries: ${OpenCV_LIBS}")
message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}")
include_directories(${OpenCV_INCLUDE_DIRS})
link_directories(${TRT_DIR}\\lib)
add_executable(yolov5 ${PROJECT_SOURCE_DIR}/yolov5.cpp ${PROJECT_SOURCE_DIR}/yololayer.cu ${PROJECT_SOURCE_DIR}/yololayer.h ${PROJECT_SOURCE_DIR}/preprocess.cu)
target_link_libraries(yolov5 "nvinfer" "nvinfer_plugin")
target_link_libraries(yolov5 ${OpenCV_LIBS})
target_link_libraries(yolov5 ${CUDA_LIBRARIES})
target_link_libraries(yolov5 Threads::Threads)
红色Warning不影响,依次看到Configue done、Generate done即可
如果你的Visual studio没有该选项,请点击查找现有的 再
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\extras\visual_studio_integration\MSBuildExtensions 目录中添加即可
python gen_wts.py -w yolov5s.pt -o yolov5s.wts
yolov5 -s yolov5s.wts yolov5s.engine s
在Release目录下创建images目录,放入yolov5-6.0的图片
yolov5 -d yolov5s.engine ./images
可以看到tensorRT速度很明显,至此yolov5+tensort完成,开始生成dll。
Yolov5TRTContext.h文件
#pragma once
#include
#include
#include
#include "cuda_utils.h"
#include "logging.h"
#include "common.hpp"
#include "utils.h"
#include "calibrator.h"
#include "preprocess.h"
#include "macros.h"
class Yolov5TRTContext {
public:
float* data;
float* prob;
IRuntime* runtime;
ICudaEngine* engine;
IExecutionContext* context;
void* buffers[2];
cudaStream_t stream;
int inputIndex;
int outputIndex;
};
extern "C" API void* Init(char* model_path);
extern "C" API void Detect(void* h, int rows, int cols, unsigned char* src_data, float(*res_array)[6]);
extern "C" API void cuda_free(void* h);
修改yolo.cpp文件如下(改动部分如下,未改动部分和@一笑奈何LHY的yolov5s.cpp文件一致,请参考mirrors / Monday-Leo / yolov5_tensorrt_win10):
文件下载,可百度网盘获取
链接:https://pan.baidu.com/s/1haG2JI_WWyFkUGwb2QzJuQ?pwd=xywh
提取码:xywh
#include
#include
#include
#include "cuda_utils.h"
#include "logging.h"
#include "common.hpp"
#include "utils.h"
#include "calibrator.h"
#include "preprocess.h"
#include "macros.h"
#include"Yolov5TRTContext.h"
void* Init(char* model_path)
{
cudaSetDevice(DEVICE);
// create a model using the API directly and serialize it to a stream
char* trtModelStream{ nullptr };
size_t size_e{ 0 };
std::string engine_name = model_path;
std::ifstream file(engine_name, std::ios::binary);
Yolov5TRTContext* trt = new Yolov5TRTContext();
if (file.good()) {
file.seekg(0, file.end);
size_e = file.tellg();
file.seekg(0, file.beg);
trtModelStream = new char[size_e];
assert(trtModelStream);
file.read(trtModelStream, size_e);
file.close();
}
trt->runtime = createInferRuntime(gLogger);
assert(trt->runtime != nullptr);
trt->engine = trt->runtime->deserializeCudaEngine(trtModelStream, size_e);
assert(trt->engine != nullptr);
trt->context = trt->engine->createExecutionContext();
assert(trt->context != nullptr);
//delete[] trtModelStream;
assert(trt->engine->getNbBindings() == 2);
trt->data = new float[BATCH_SIZE * 3 * INPUT_H * INPUT_W];
trt->prob = new float[BATCH_SIZE * OUTPUT_SIZE];
trt->inputIndex = trt->engine->getBindingIndex(INPUT_BLOB_NAME);
trt->outputIndex = trt->engine->getBindingIndex(OUTPUT_BLOB_NAME);
assert(trt->inputIndex == 0);
assert(trt->outputIndex == 1);
// Create GPU buffers on device
CUDA_CHECK(cudaMalloc(&trt->buffers[trt->inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
CUDA_CHECK(cudaMalloc(&trt->buffers[trt->outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));
// Create stream
CUDA_CHECK(cudaStreamCreate(&trt->stream));
// In order to bind the buffers, we need to know the names of the input and output tensors.
// Note that indices are guaranteed to be less than IEngine::getNbBindings()
return (void*)trt;
}
void Detect(void* h, int rows, int cols, unsigned char* src_data, float(*res_array)[6])
{
Yolov5TRTContext* trt = (Yolov5TRTContext*)h;
cv::Mat img = cv::Mat(rows, cols, CV_8UC3, src_data);
// prepare input data ---------------------------
cv::Mat pr_img = preprocess_img(img, INPUT_W, INPUT_H); // letterbox BGR to RGB
int i = 0;
for (int row = 0; row < INPUT_H; ++row) {
uchar* uc_pixel = pr_img.data + row * pr_img.step;
for (int col = 0; col < INPUT_W; ++col)
{
trt->data[0 * 3 * INPUT_H * INPUT_W + i] = (float)uc_pixel[2] / 255.0;
trt->data[0 * 3 * INPUT_H * INPUT_W + i + INPUT_H * INPUT_W] = (float)uc_pixel[1] / 255.0;
trt->data[0 * 3 * INPUT_H * INPUT_W + i + 2 * INPUT_H * INPUT_W] = (float)uc_pixel[0] / 255.0;
uc_pixel += 3;
++i;
}
}
// Run inference
doInference(*trt->context, trt->stream, trt->buffers, trt->data, trt->prob, BATCH_SIZE);
std::vector<std::vector<Yolo::Detection>> batch_res(1);
auto& res = batch_res[0];
nms(res, &trt->prob[0 * OUTPUT_SIZE], CONF_THRESH, NMS_THRESH);
int len = res.size();
for (size_t j = 0; j < res.size(); j++) {
cv::Rect r = get_rect(img, res[j].bbox);
res_array[j][0] = r.x;
res_array[j][1] = r.y;
res_array[j][2] = r.width;
res_array[j][3] = r.height;
res_array[j][4] = res[j].class_id;
res_array[j][5] = res[j].conf;
}
}
void cuda_free(void* h) {
Yolov5TRTContext* trt = (Yolov5TRTContext*)h;
cudaStreamDestroy(trt->stream);
CUDA_CHECK(cudaFree(trt->buffers[trt->inputIndex]));
CUDA_CHECK(cudaFree(trt->buffers[trt->outputIndex]));
trt->context->destroy();
trt->engine->destroy();
trt->runtime->destroy();
}
运行成功
在Release目录下查看生成的dll文件
至此,yolov5+tensorRT+dll完成!!!
from ctypes import *
import cv2
import numpy as np
import numpy.ctypeslib as npct
class Detector():
def __init__(self,model_path,dll_path):
#self.yolov5 = CDLL(dll_path)
self.yolov5 = CDLL(dll_path, winmode=0)#python3.8版本加载dll失败时用
self.yolov5.Detect.argtypes = [c_void_p,c_int,c_int,POINTER(c_ubyte),npct.ndpointer(dtype = np.float32, ndim = 2, shape = (50, 6), flags="C_CONTIGUOUS")]
self.yolov5.Init.restype = c_void_p
self.yolov5.Init.argtypes = [c_void_p]
self.yolov5.cuda_free.argtypes = [c_void_p]
self.c_point = self.yolov5.Init(model_path)
def predict(self,img):
rows, cols = img.shape[0], img.shape[1]
res_arr = np.zeros((50,6),dtype=np.float32)
print("res_Arr===",res_arr)
self.yolov5.Detect(self.c_point,c_int(rows), c_int(cols), img.ctypes.data_as(POINTER(c_ubyte)),res_arr)
print("res_Arr===",res_arr)
self.bbox_array = res_arr[~(res_arr==0).all(1)]
print("bbox===",self.bbox_array)
return self.bbox_array
def free(self):
self.yolov5.cuda_free(self.c_point)
def visualize(img,bbox_array):
for temp in bbox_array:
bbox = [temp[0],temp[1],temp[2],temp[3]] #xywh
clas = int(temp[4])
score = temp[5]
cv2.rectangle(img,(int(temp[0]),int(temp[1])),(int(temp[0]+temp[2]),int(temp[1]+temp[3])), (105, 237, 249), 2)
img = cv2.putText(img, "class:"+str(clas)+" "+str(round(score,2)), (int(temp[0]),int(temp[1])-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (105, 237, 249), 1)
return img
det = Detector(model_path=b"./yolov5s.engine",dll_path="./yolov5.dll") # b'' is needed
img = cv2.imread("./images/zidane.jpg")
result = det.predict(img)
img = visualize(img,result)
cv2.imshow("img",img)
cv2.waitKey(0)
det.free()
cv2.destroyAllWindows()
运行
python python_trt.py
Visual studio创建testYoloDll项目
(1)并创建两个源文件:testYolov5Dll.cpp、testYolov5Dll2.cpp;
(2)导入Yolov5TRTContext.h;
(3)导入yolov5.dll、yolov5.exp、yolov5.lib、yolov5s.engine
项目目录如下:
项目配置
将以下文件替换成自己的文件目录即可。
VC++目录-》包含目录:
D:\Space\VisualStudioSpace\tensorrtx-yolov5-v6.0\yolov5
D:\AITool\TensorRT\TensorRT-8.4.1.5\include
D:\AITool\CUDA\CUDA_Development\include
D:\Tool\opencv4.5.1\opencv\build\include\opencv2
D:\Tool\opencv4.5.1\opencv\build\include
VC++目录-》库目录:
D:\Tool\opencv4.5.1\opencv\build\x64\vc15\lib
D:\Space\VisualStudioSpace\tensorrtx-yolov5-v6.0\yolov5\build\Release
D:\AITool\TensorRT\TensorRT-8.4.1.5\lib
D:\AITool\CUDA\CUDA_Development\lib\x64
链接器->输入:
opencv_world451d.lib
cudart.lib
cudart_static.lib
yolov5.lib
nvinfer.lib
nvinfer_plugin.lib
nvonnxparser.lib
nvparsers.lib
静态、动态二选一即可,调用时把另一个dispaly,main注释即可!!!
#pragma once
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include"Yolov5TRTContext.h" //引入头文件调用
using namespace std;
using namespace cv;
void display(Mat dst, vector<vector<float>> list);//显示
//根据自己模型定义 类别
const static int class_num = 80;
const static string classes[class_num] = { "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush" };
int main()
{
bool isOpenCapture = false;//默认不开启(图片); 开启(摄像头检测)
char model[] = "yolov5s.engine";//yolov5s.engine 位置
char* model_path = model;
float res_arr[50][6] = { 0.0f };//50个anchor 6(x,y,w,h,class,confidence)
Mat img, dst;
void* trt = (void*)Init(model_path);//初始化模型
/*
clock_t start, finish;
start = clock();
*/
if (isOpenCapture) {
//摄像头检测
const char* image_path = "";
cv::VideoCapture capture(0);//打开摄像头
while (true)
{
//摄像头测试
capture >> dst; //取一帧图片
img = dst;
Detect(trt, img.rows, img.cols, img.data, res_arr);//推理
vector<vector<float>> list;
for (int i = 0; i < 50; i++) {
if (res_arr[i][0] != 0) {
vector<float> temp;
temp.push_back(res_arr[i][0]);//x
temp.push_back(res_arr[i][1]);//y
temp.push_back(res_arr[i][2]);//w
temp.push_back(res_arr[i][3]);//h
temp.push_back(res_arr[i][4]);//class
temp.push_back(res_arr[i][5]);//confidence
list.push_back(temp);
}
}
//cout << "the list size" << list.size() << endl;
//调用dispaly
display(img, list);
waitKey(1);
//release
img.release();
dst.release();
}
}
else {
//图片检测
const char* image_path = "./images/zidane.jpg";//图片路径
img = cv::imread(image_path);//读取图片
dst = img;
Detect(trt, img.rows, img.cols, img.data, res_arr);//推理
vector<vector<float>> list;
for (int i = 0; i < 50; i++) {
if (res_arr[i][0] != 0) {
vector<float> temp;
temp.push_back(res_arr[i][0]);//x
temp.push_back(res_arr[i][1]);//y
temp.push_back(res_arr[i][2]);//w
temp.push_back(res_arr[i][3]);//h
temp.push_back(res_arr[i][4]);//class
temp.push_back(res_arr[i][5]);//confidence
list.push_back(temp);
}
}
//cout << "the list size" << list.size() << endl;
//调用dispaly
display(img, list);
waitKey(0);
//release
img.release();
dst.release();
}
/*
finish = clock();
cout << "时间消耗:" << (finish - start) / CLOCKS_PER_SEC * 1000 << endl;
*/
//调用cuda_free
cuda_free(trt);
return 0;
}
void display(Mat dst, vector<vector<float>> list) {
//初始化m
//遍历list
Scalar scalar(0, 255, 0);//BGR(Green)
vector<float> temp;
for (int i = 0; i < list.size(); i++) {
temp = list.at(i);
float x = temp.at(0);
float y = temp.at(1);
float w = temp.at(2);
float h = temp.at(3);
int c = (int)temp.at(4);
float confidence = temp.at(5);
// 在dst上面作图
//cout << "x=" << x << ",y=" << y << ",w" << w << ",h" << h << ",class=" << c << ",confidence=" << confidence << endl;
Rect rect(x, y, w, h);//绘制矩形
rectangle(dst, rect, scalar, 2, LINE_8, 0);
//在dst上添加class confidence
string text = classes[c] + format(",%0.3f", confidence);
putText(dst, text, Point2f(x, y + 10), FONT_HERSHEY_SIMPLEX, 0.5, scalar);
temp.clear();
}
namedWindow("yolov5-6.0", WINDOW_AUTOSIZE); //WINDOW_NORMAL
imshow("yolov5-6.0", dst);
}
#pragma once
#include
#include
#include
#include
#include
#include
#include
#include
#include
//动态调用dll
using namespace std;
using namespace cv;
//void display(Mat dst,vector> list);//显示boundingbox
//根据自己模型定义 类别
const static int class_num = 80;
const static string classes[class_num] = { "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush" };
//int main()
//{
// HMODULE module = LoadLibrary(_T("yolov5.dll"));//显示加载dll
// if (module == NULL)
// {
// cout << "加载yolov5.dll动态库失败" << endl;
// return -1;
// }
// else {
// cout << "加载成功!!!" << endl;
// }
// typedef void * (*InitFuc)(char* ); // 定义函数指针类型
// typedef void (*DetectFuc)(void* , int , int , unsigned char* , float(*)[6]); // 定义函数指针类型
// typedef void (*cuda_freeFuc)(void*);
//
// //从dll中加载Init、Detect、cuda_free
// InitFuc Init;
// Init = (InitFuc)GetProcAddress(module,"Init");
// //推理
// DetectFuc Detect;
//
// Detect = (DetectFuc)GetProcAddress(module, "Detect");
// //free
// cuda_freeFuc cuda_free;
// cuda_free = (cuda_freeFuc)GetProcAddress(module, "cuda_free");
//
//
// bool isOpenCapture = false;//默认不开启(图片); 开启(摄像头检测)
// char model[] = "yolov5s.engine";//yolov5s.engine 位置
// char* model_path = model;
// float res_arr[50][6] = { 0.0f };//50个anchor 6(x,y,w,h,class,confidence)
// Mat img, dst;
// void* trt = (void*)Init(model_path);//初始化模型
// /*
// clock_t start, finish;
// start = clock();
// */
//
// if (isOpenCapture) {
// //摄像头检测
// const char* image_path = "";
// cv::VideoCapture capture(0);//打开摄像头
// while (true)
// {
// //摄像头测试
// capture >> dst; //取一帧图片
// img = dst;
// Detect(trt, img.rows, img.cols, img.data, res_arr);//推理
// vector> list;
// for (int i = 0; i < 50; i++) {
// if (res_arr[i][0] != 0) {
// vector temp;
// temp.push_back(res_arr[i][0]);//x
// temp.push_back(res_arr[i][1]);//y
// temp.push_back(res_arr[i][2]);//w
// temp.push_back(res_arr[i][3]);//h
// temp.push_back(res_arr[i][4]);//class
// temp.push_back(res_arr[i][5]);//confidence
// list.push_back(temp);
// }
// }
// //cout << "the list size" << list.size() << endl;
// //调用dispaly
// display(img, list);
// waitKey(1);
// //release
// img.release();
// dst.release();
//
// }
// }
// else {
// //图片检测
// const char* image_path = "./images/bus.jpg";//图片路径
// img = cv::imread(image_path);//读取图片
// dst = img;
// Detect(trt, img.rows, img.cols, img.data, res_arr);//推理
// vector> list;
// for (int i = 0; i < 50; i++) {
// if (res_arr[i][0] != 0) {
// vector temp;
// temp.push_back(res_arr[i][0]);//x
// temp.push_back(res_arr[i][1]);//y
// temp.push_back(res_arr[i][2]);//w
// temp.push_back(res_arr[i][3]);//h
// temp.push_back(res_arr[i][4]);//class
// temp.push_back(res_arr[i][5]);//confidence
// list.push_back(temp);
// }
// }
// //cout << "the list size" << list.size() << endl;
// //调用dispaly
// display(img, list);
// waitKey(0);
// //release
// img.release();
// dst.release();
// }
// /*
// finish = clock();
// cout << "时间消耗:" << (finish - start) / CLOCKS_PER_SEC * 1000 << endl;
// */
//
// //调用cuda_free
// cuda_free(trt);
//
// return 0;
//
//}
//void display(Mat dst, vector> list) {
// //初始化m
// //遍历list
// Scalar scalar(0, 255, 0);//BGR(Green)
// vector temp;
// for (int i = 0; i < list.size(); i++) {
//
// temp = list.at(i);
// float x = temp.at(0);
// float y = temp.at(1);
// float w = temp.at(2);
// float h = temp.at(3);
// int c = (int)temp.at(4);
// float confidence = temp.at(5);
//
// // 在dst上面作图
// //cout << "x=" << x << ",y=" << y << ",w" << w << ",h" << h << ",class=" << c << ",confidence=" << confidence << endl;
// Rect rect(x, y, w, h);//绘制矩形
// rectangle(dst, rect, scalar, 2, LINE_8, 0);
//
// //在dst上添加class confidence
// string text = classes[c] + format(",%0.3f", confidence);
// putText(dst, text, Point2f(x, y + 10), FONT_HERSHEY_SIMPLEX, 0.5, scalar);
// temp.clear();
//
// }
// namedWindow("yolov5-6.0", WINDOW_AUTOSIZE); //WINDOW_NORMAL
// imshow("yolov5-6.0", dst);
//}
运行测试结果:
可以图片测试,也可以打开摄像头测试,只需要更换代码中对应的文件目录,以及isOpenCapture即可!!!
testYolov5Dll项目想要获取的可以进行下载
CSDN下载:testYolov5Dll项目