使用yolov5-lite自带的export.py导出onnx格式,图像大小设置320,batch 1
之后可以使用 onnxsim对模型进一步简化
onnxsim参考链接:onnxsim-让导出的onnx模型更精简_alex1801的博客-CSDN博客
python export.py --weights weights/v5lite-e.pt --img 320 --batch 1
python -m onnxsim weights/v5lite-e.onnx weights/yolov5-lite-sim.onnx
这个版本的推理FPS能有11+FPS
这两处换成自己的模型和训练的类别即可:
parser.add_argument('--modelpath', type=str, default="/media/xcy/dcd05f09-46df-4879-bfeb-3bab03a6cc3a/YOLOv5-Lite/weights/v5lite-e.onnx",
help="onnx filepath")
parser.add_argument('--classfile', type=str, default='coco.names',
help="classname filepath")
参考github:GitHub - hpc203/yolov5-lite-onnxruntime: 使用ONNXRuntime部署yolov5-lite目标检测,包含C++和Python两个版本的程序
import cv2
import numpy as np
import argparse
import onnxruntime as ort
import time
class yolov5_lite():
def __init__(self, model_pb_path, label_path, confThreshold=0.5, nmsThreshold=0.5, objThreshold=0.5):
so = ort.SessionOptions()
so.log_severity_level = 3
self.net = ort.InferenceSession(model_pb_path, so)
self.classes = list(map(lambda x: x.strip(), open(label_path, 'r').readlines()))
self.num_classes = len(self.classes)
anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
self.nl = len(anchors)
self.na = len(anchors[0]) // 2
self.no = self.num_classes + 5
self.grid = [np.zeros(1)] * self.nl
self.stride = np.array([8., 16., 32.])
self.anchor_grid = np.asarray(anchors, dtype=np.float32).reshape(self.nl, -1, 2)
self.confThreshold = confThreshold
self.nmsThreshold = nmsThreshold
self.objThreshold = objThreshold
self.input_shape = (self.net.get_inputs()[0].shape[2], self.net.get_inputs()[0].shape[3])
def resize_image(self, srcimg, keep_ratio=True):
top, left, newh, neww = 0, 0, self.input_shape[0], self.input_shape[1]
if keep_ratio and srcimg.shape[0] != srcimg.shape[1]:
hw_scale = srcimg.shape[0] / srcimg.shape[1]
if hw_scale > 1:
newh, neww = self.input_shape[0], int(self.input_shape[1] / hw_scale)
img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
left = int((self.input_shape[1] - neww) * 0.5)
img = cv2.copyMakeBorder(img, 0, 0, left, self.input_shape[1] - neww - left, cv2.BORDER_CONSTANT,
value=0) # add border
else:
newh, neww = int(self.input_shape[0] * hw_scale), self.input_shape[1]
img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
top = int((self.input_shape[0] - newh) * 0.5)
img = cv2.copyMakeBorder(img, top, self.input_shape[0] - newh - top, 0, 0, cv2.BORDER_CONSTANT, value=0)
else:
img = cv2.resize(srcimg, self.input_shape, interpolation=cv2.INTER_AREA)
return img, newh, neww, top, left
def _make_grid(self, nx=20, ny=20):
xv, yv = np.meshgrid(np.arange(ny), np.arange(nx))
return np.stack((xv, yv), 2).reshape((-1, 2)).astype(np.float32)
def postprocess(self, frame, outs, pad_hw):
newh, neww, padh, padw = pad_hw
frameHeight = frame.shape[0]
frameWidth = frame.shape[1]
ratioh, ratiow = frameHeight / newh, frameWidth / neww
# Scan through all the bounding boxes output from the network and keep only the
# ones with high confidence scores. Assign the box's class label as the class with the highest score.
classIds = []
confidences = []
boxes = []
for detection in outs:
scores = detection[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > self.confThreshold and detection[4] > self.objThreshold:
center_x = int((detection[0] - padw) * ratiow)
center_y = int((detection[1] - padh) * ratioh)
width = int(detection[2] * ratiow)
height = int(detection[3] * ratioh)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
classIds.append(classId)
confidences.append(float(confidence))
boxes.append([left, top, width, height])
# Perform non maximum suppression to eliminate redundant overlapping boxes with
# lower confidences.
indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confThreshold, self.nmsThreshold)
for i in indices:
i = i[0] if isinstance(i, (tuple, list)) else i
box = boxes[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
frame = self.drawPred(frame, classIds[i], confidences[i], left, top, left + width, top + height)
return frame
def drawPred(self, frame, classId, conf, left, top, right, bottom):
# Draw a bounding box.
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=4)
label = '%.2f' % conf
label = '%s:%s' % (self.classes[classId], label)
# Display the label at the top of the bounding box
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
top = max(top, labelSize[1])
# cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED)
cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2)
return frame
def detect(self, srcimg):
img, newh, neww, top, left = self.resize_image(srcimg)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.astype(np.float32) / 255.0
blob = np.expand_dims(np.transpose(img, (2, 0, 1)), axis=0)
outs = self.net.run(None, {self.net.get_inputs()[0].name: blob})[0].squeeze(axis=0)
row_ind = 0
for i in range(self.nl):
h, w = int(self.input_shape[0] / self.stride[i]), int(self.input_shape[1] / self.stride[i])
length = int(self.na * h * w)
if self.grid[i].shape[2:4] != (h, w):
self.grid[i] = self._make_grid(w, h)
outs[row_ind:row_ind + length, 0:2] = (outs[row_ind:row_ind + length, 0:2] * 2. - 0.5 + np.tile(
self.grid[i], (self.na, 1))) * int(self.stride[i])
outs[row_ind:row_ind + length, 2:4] = (outs[row_ind:row_ind + length, 2:4] * 2) ** 2 * np.repeat(
self.anchor_grid[i], h * w, axis=0)
row_ind += length
srcimg = self.postprocess(srcimg, outs, (newh, neww, top, left))
# cv2.imwrite('result.jpg', srcimg)
return srcimg
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--imgpath', type=str, default="",
help="image path")
parser.add_argument('--modelpath', type=str, default="/media/xcy/dcd05f09-46df-4879-bfeb-3bab03a6cc3a/YOLOv5-Lite/weights/v5lite-e.onnx",
help="onnx filepath")
parser.add_argument('--classfile', type=str, default='coco.names',
help="classname filepath")
parser.add_argument('--confThreshold', default=0.5, type=float, help='class confidence')
parser.add_argument('--nmsThreshold', default=0.6, type=float, help='nms iou thresh')
args = parser.parse_args()
# srcimg = cv2.imread(args.imgpath)
# print(args.imgpath,srcimg)
net = yolov5_lite(args.modelpath, args.classfile, confThreshold=args.confThreshold, nmsThreshold=args.nmsThreshold)
print(net)
counter = 0
start_time = time.time()
# 1 加载视频文件
capture = cv2.VideoCapture(0)
# 2 读取视频
ret, frame = capture.read()
fps = capture.get(cv2.CAP_PROP_FPS) # 视频平均帧率
while ret:
counter += 1 # 计算帧数
if (time.time() - start_time) != 0: # 实时显示帧数
cv2.putText(frame, "FPS {0}".format(float('%.1f' % (counter / (time.time() - start_time)))), (30, 50),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255),
2)
# 3 ret 是否读取到了帧,读取到了则为True
cv2.imshow("video", frame)
ret, frame = capture.read()
print("FPS: ", counter / (time.time() - start_time))
counter = 0
start_time = time.time()
srcimg = net.detect(frame)
# winName = 'Deep learning object detection in onnxruntime'
# cv2.namedWindow(winName, cv2.WINDOW_NORMAL)
# cv2.imshow(winName, srcimg)
# 4 若键盘按下q则退出播放
if cv2.waitKey(20) & 0xff == ord('q'):
break
# 5 释放资源
capture.release()
# 6 关闭所有窗口
cv2.destroyAllWindows()
此版本能够在笔记本上达到33+FPS,正在整理代码。后续发
代码整理好了,如下:需要VS2019配置ncnn之后即可运行。
LINUX配置NCNN可以参考我的另一篇博客:Ubuntu20.04配置NCNN推理框架(转换yolov5 onnx格式到ncnn格式-CSDN博客
WINDOWS配置比较简单,大家搜一搜都能搜到。
#include "layer.h"
#include "net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include
#include
#include
#endif
#include
#include
#include
#include
#include
//#define YOLOV5_V60 1 //YOLOv5 v6.0
#define YOLOV5_V62 1 //YOLOv5 v6.2 export onnx model method https://github.com/shaoshengsong/yolov5_62_export_ncnn
#if YOLOV5_V60 || YOLOV5_V62
#define MAX_STRIDE 64
#else
#define MAX_STRIDE 32
class YoloV5Focus : public ncnn::Layer
{
public:
YoloV5Focus()
{
one_blob_only = true;
}
virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob, const ncnn::Option& opt) const
{
int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;
int outw = w / 2;
int outh = h / 2;
int outc = channels * 4;
top_blob.create(outw, outh, outc, 4u, 1, opt.blob_allocator);
if (top_blob.empty())
return -100;
#pragma omp parallel for num_threads(opt.num_threads)
for (int p = 0; p < outc; p++)
{
const float* ptr = bottom_blob.channel(p % channels).row((p / channels) % 2) + ((p / channels) / 2);
float* outptr = top_blob.channel(p);
for (int i = 0; i < outh; i++)
{
for (int j = 0; j < outw; j++)
{
*outptr = *ptr;
outptr += 1;
ptr += 2;
}
ptr += w;
}
}
return 0;
}
};
DEFINE_LAYER_CREATOR(YoloV5Focus)
#endif //YOLOV5_V60 YOLOV5_V62
struct Object
{
cv::Rect_ rect;
int label;
float prob;
};
static inline float intersection_area(const Object& a, const Object& b)
{
cv::Rect_ inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector