本文基于yolov7+tensorrt目标检测,并且集成了opencv多线程、图像队列存取、自动保存xml和jpg等模块,实现多方位高性能部署。
如果还不会训练并且导出模型的朋友可以移步这篇文章:YOLOv7系列教程:一、基于自定义数据集训练专属于自己的目标检测模型(保姆级教程,含数据集预处理),包含对train.py/test.py/detect.py/export.py详细说明
本文已经在github开源:github链接:yolov7_tensorrt_opencv_queue
在开始之前,需要在本机配置cuda+cudnn+tensorrt,注意三者的版本一定要对齐,不然后续会出现错误(之前用的tensorrt8.2.xx,实测会报错,网上大佬说换成8.4.3.1就ok了),本文的配置如下:
如果tensorrt是用tar包安装的,则需要进入tensorrt的安装目录进行导出,当然也可以使用其他安装方式,则直接使用trtexec工具即可。trtexec的具体使用方式可以查看这篇:TensorRT教程3:使用trtexec工具转engine
/你安装tensorrt路径/TensorRt-8.4.3.1/targets/x86_64-linux-gnu/bin/trtexec --onnx=/你的onnx路径/best.onnx --saveEngine=/你需要保存的路径/best.engine --fp16 --workspace=1000
git clone https://github.com/ZhijunLStudio/yolov7_tensorrt_opencv_queue.git
pip install -r requirements.txt
修改detect.py中的配置,其中:
在代码根目录新建一个“model”的文件夹,将best.engine放进去
trt_name = "best.engine"
如果您使用的是usb相机或者板载相机,则可以将其改为0(注意没有双引号)
RtspUrl = "rtsp://admin:[email protected]:554/Streaming/Channels/101"
需要按照{“文件夹名字”: {0: “label 1”, 1: “label 2”, 2: “label 3”…}}进行配置
label_dict = {'person': {0: 'person'}}
python detect.py
主要功能是调用各个封装的模块,包括tensorrt引擎模块,保存xml和jpg模块、多卡下索引可用GPU模块以及图像存入队列模块,并且循环处理图像队列中的数据:
import os
import cv2
from cv2 import getTickCount, getTickFrequency
from queue import Queue
import queue
from algorithm.yolov7_trt import TRT_engine
from algorithm.yolov7_trt import visualize
from utils.myPrint import customPrint
from utils.generateXml import GenerateJpgAndXml
from utils.nvidia import indexGPU
from utils.frameThread import FrameThread
if __name__ == '__main__':
# 根据自己模型和摄像头信息,修改1、2、3即可
# 1.放在model文件夹下tensorrt引擎的名字
trt_name = "best.engine"
# 2.rtsp地址,如果使用的是USB摄像头或者其他板载摄像头,可以更改为0(没有引号)
RtspUrl = "rtsp://admin:[email protected]:554/Streaming/Channels/101"
# 3.自动生成xml配置——标签字典,需要按照{"配置后的文件夹名": {0: "标签1", 1: "标签2", 2: "标签3"...}}进行配置
label_dict = {'person': {0: 'person'}}
# 根据gpu使用情况获取占用率低的GPU编号
gpu_id = indexGPU()
# 获取当前路径
trt_path = os.path.join(os.getcwd(), "model", trt_name)
# 第一个参数为预测图的大小,第二个参数为模型路径,第三个参数为选用第几号GPU
trt_engine = TRT_engine(imgsz=640, weight=trt_path, GPUId=0)
# trt_engine = TRT_engine(imgsz=640, weight=trt_path, GPUId=gpu_id)
# 新建保存xml的文件夹
label_dict_key = list(label_dict.keys())[0]
label_dict_value = list(label_dict.values())[0]
car_write_xml = GenerateJpgAndXml(label_dict_key, label_dict_value)
# 新建一个队列,用来存放图像数组
q = Queue()
# 多线程对象
thread = FrameThread(RtspUrl, q)
# 设置读图线程为守护线程
thread.setDaemon(True)
# 启动读图线程
thread.start()
while True:
try:
loop_start = getTickCount()
# 获取一帧图像
frame = q.get(block=True, timeout=3)
results = trt_engine.predict(frame, threshold=0.5)
# 结果可视化
frame = visualize(frame, results)
# FPS计时
loop_time = getTickCount() - loop_start
total_time = loop_time / (getTickFrequency())
FPS = 1 / total_time
# 左上角文字信息
cv2.putText(frame, f"FPS: {int(FPS)}", (0, 100), cv2.FONT_HERSHEY_COMPLEX, 2.0, (100, 200, 200), 2)
cv2.putText(frame, "Press q to exit", (0, 200), cv2.FONT_HERSHEY_COMPLEX, 2.0, (100, 100, 200), 2)
out_win = "yolov7_trt_output"
cv2.namedWindow(out_win, cv2.WINDOW_NORMAL)
cv2.setWindowProperty(out_win, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
cv2.imshow(out_win, frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
except queue.Empty:
print('队列为空,get失败')
customPrint("----------------------所有程序已结束----------------------")
import cv2
import tensorrt as trt
import torch
import numpy as np
import os
from collections import OrderedDict,namedtuple
class TRT_engine():
def __init__(self, imgsz, weight, GPUId) -> None:
self.imgsz = [imgsz,imgsz]
self.weight = weight
self.device = torch.device('cuda:'+str(GPUId))
# self.device = torch.cuda.set_device(GPUId)
self.init_engine()
def init_engine(self):
# Infer TensorRT Engine
self.Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
self.logger = trt.Logger(trt.Logger.INFO)
trt.init_libnvinfer_plugins(self.logger, namespace="")
with open(self.weight, 'rb') as self.f, trt.Runtime(self.logger) as self.runtime:
self.model = self.runtime.deserialize_cuda_engine(self.f.read())
self.bindings = OrderedDict()
self.fp16 = False
for index in range(self.model.num_bindings):
self.name = self.model.get_binding_name(index)
self.dtype = trt.nptype(self.model.get_binding_dtype(index))
self.shape = tuple(self.model.get_binding_shape(index))
self.data = torch.from_numpy(np.empty(self.shape, dtype=np.dtype(self.dtype))).to(self.device)
self.bindings[self.name] = self.Binding(self.name, self.dtype, self.shape, self.data, int(self.data.data_ptr()))
if self.model.binding_is_input(index) and self.dtype == np.float16:
self.fp16 = True
self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())
self.context = self.model.create_execution_context()
def letterbox(self,im,color=(114, 114, 114), auto=False, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
new_shape = self.imgsz
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
self.r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better val mAP)
self.r = min(self.r, 1.0)
# Compute padding
new_unpad = int(round(shape[1] * self.r)), int(round(shape[0] * self.r))
self.dw, self.dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
self.dw, self.dh = np.mod(self.dw, stride), np.mod(self.dh, stride) # wh padding
self.dw /= 2 # divide padding into 2 sides
self.dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(self.dh - 0.1)), int(round(self.dh + 0.1))
left, right = int(round(self.dw - 0.1)), int(round(self.dw + 0.1))
self.img = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return self.img,self.r,self.dw,self.dh
def preprocess(self,image):
self.img,self.r,self.dw,self.dh = self.letterbox(image)
self.img = self.img.transpose((2, 0, 1))
self.img = np.expand_dims(self.img,0)
self.img = np.ascontiguousarray(self.img)
self.img = torch.from_numpy(self.img).to(self.device)
self.img = self.img.float()
return self.img
def predict(self,img,threshold):
img = self.preprocess(img)
self.binding_addrs['images'] = int(img.data_ptr())
self.context.execute_v2(list(self.binding_addrs.values()))
nums = self.bindings['num_dets'].data[0].tolist()
boxes = self.bindings['det_boxes'].data[0].tolist()
scores =self.bindings['det_scores'].data[0].tolist()
classes = self.bindings['det_classes'].data[0].tolist()
num = int(nums[0])
new_bboxes = []
for i in range(num):
if(scores[i] < threshold):
continue
xmin = (boxes[i][0] - self.dw)/self.r
ymin = (boxes[i][1] - self.dh)/self.r
xmax = (boxes[i][2] - self.dw)/self.r
ymax = (boxes[i][3] - self.dh)/self.r
new_bboxes.append([classes[i],scores[i],xmin,ymin,xmax,ymax])
return new_bboxes
def visualize(img,bbox_array):
for temp in bbox_array:
xmin = int(temp[2])
ymin = int(temp[3])
xmax = int(temp[4])
ymax = int(temp[5])
clas = int(temp[0])
score = temp[1]
cv2.rectangle(img,(xmin,ymin),(xmax,ymax), (105, 237, 249), 2)
img = cv2.putText(img, "class:"+str(clas)+" "+str(round(score,2)), (xmin,int(ymin)-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (105, 237, 249), 1)
# img = cv2.putText(img, "h:"+str(int(ymax-ymin))+ " w:" + str(int(xmax - xmin)), (xmin,int(ymin)-25), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 237, 249), 2)
return img
import os
import cv2
import datetime
from utils.myPrint import customPrint
class GenerateJpgAndXml:
"""
参数名含义:
parentName:存放jpg和xml上一级文件夹名字,如person
"""
def __init__(self, parentName, labelDict):
self.parentName = parentName
# 存放所有文件的主文件夹路径
self.parentPath = os.path.join(os.getcwd(), "JpgAndXml")
self.midPath = os.path.join(self.parentPath, self.parentName)
# 存放jpg文件夹名字
self.jpgName = "JPEGImages"
# 存放xml文件夹名字
self.xmlName = "Annotations"
# 存放标签的字典
self.labelDict = labelDict
# 第一次进来,需要判断下文件夹是否存在
self.isExist()
def isExist(self):
# 存放jpg文件的文件夹
self.jpgPath = os.path.join(self.midPath, self.jpgName)
# 存放xml文件的文件夹
self.xmlPath = os.path.join(self.midPath, self.xmlName)
# 判断jpg和xml文件夹是否存在,不存在则创建
for perPath in [self.jpgPath, self.xmlPath]:
# 判断所在目录下是否有该文件名的文件夹
if not os.path.exists(perPath):
# 创建多级目录用mkdirs
print(f"创建成功,已创建文件夹{perPath}")
os.makedirs(perPath)
else:
print(f"创建失败,已存在文件夹{perPath}")
def generatr_xml(self, frame, result):
# print('开始写xml')
# 获取当前时间戳
xmlPrefix = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")
# print(xmlPrefix)
hwc = frame.shape
# jpg名字
jpgName = xmlPrefix + ".jpg"
# jpg路径
jpgPath = os.path.join(self.jpgPath, jpgName)
# 写图片
cv2.imwrite(jpgPath, frame)
# xml路径
xmlPath = os.path.join(self.xmlPath, xmlPrefix + ".xml")
with open(xmlPath, 'w') as xml_file:
xml_file.write('\n' )
xml_file.write('\t' + self.parentName +'\n')
xml_file.write('\t' + jpgName + '\n')
xml_file.write('\t' + jpgPath + '\n')
xml_file.write('\t)
xml_file.write('\t\t' + 'Unknown' + '\n')
xml_file.write('\t\n')
xml_file.write('\t\n' )
xml_file.write('\t\t' + str(hwc[1]) + '\n')
xml_file.write('\t\t' + str(hwc[0]) + '\n')
xml_file.write('\t\t' +str(hwc[2])+'\n')
xml_file.write('\t\n')
xml_file.write('\t0 \n')
for re in result:
ObjName = self.labelDict[re[0]]
xmin = int(re[2])
ymin = int(re[3])
xmax = int(re[4])
ymax = int(re[5])
xml_file.write('\t)
xml_file.write('\t\t' + ObjName + '\n')
xml_file.write('\t\tUnspecified \n')
xml_file.write('\t\t0 \n')
xml_file.write('\t\t0 \n')
xml_file.write('\t\t\n' )
xml_file.write('\t\t\t' + str(xmin) + '\n')
xml_file.write('\t\t\t' + str(ymin) + '\n')
xml_file.write('\t\t\t' + str(xmax) + '\n')
xml_file.write('\t\t\t' + str(ymax) + '\n')
# xml_file.write('\t\t\t' + str(4) + ' \n')
xml_file.write('\t\t\n')
# xml_file.write('\t\t \n')
xml_file.write('\t\n')
xml_file.write('')
customPrint(f"{jpgPath}的jpg和xml已写入")
import pynvml
from utils.myPrint import customPrint
UNIT = 1024 * 1024
def indexGPU():
pynvml.nvmlInit() #初始化
gpuDeviceCount = pynvml.nvmlDeviceGetCount()#获取Nvidia GPU块数
gpudir = {}
for i in range(gpuDeviceCount):
handle = pynvml.nvmlDeviceGetHandleByIndex(i) #获取GPU i的handle,后续通过handle来处理
memoryInfo = pynvml.nvmlDeviceGetMemoryInfo(handle)#通过handle获取GPU i的信息
customPrint(f"显存空闲率:{memoryInfo.free/memoryInfo.total}")
gpudir[i] = memoryInfo.free/memoryInfo.total
gpumin = max(gpudir.keys(),key=(lambda k:gpudir[k]))
pynvml.nvmlShutdown() #最后关闭管理工具
customPrint(f"选择第{gpumin}号GPU")
return gpumin
import cv2
import threading
import redis
import queue
from utils.myPrint import customPrint
class FrameThread(threading.Thread):
def __init__(self, rtsp_url, q):
super(FrameThread, self).__init__()
self.rtsp_url = rtsp_url
self.q = q
self.thread_exit = False
# self.run()
def run(self):
customPrint('已进入取图循环')
# 用来记录异常次数的标记
exit_frame_num = 0
exit_cap_num = 0
cap = cv2.VideoCapture(self.rtsp_url)
while not self.thread_exit:
ret, frame = cap.read()
if ret:
exit_frame_num = 0
exit_cap_num = 0
try:
self.q.put(frame, block=True, timeout=3)
except queue.Full:
customPrint('队列已满,写入失败')
# print(self.q.qsize())
else:
exit_frame_num += 1
# 异常5次则重新读一下流
if exit_frame_num >= 5:
customPrint(f"读流异常,已经开始{exit_frame_num}:{exit_cap_num}重新读流")
cap = cv2.VideoCapture(self.rtsp_url)
exit_cap_num += 1
# 读流异常5次则退出
if exit_cap_num == 5:
self.thread_exit = True
customPrint(f'摄像头已经退出')
cap.release()
参考:
YOLOv7 Tensorrt Python部署教程