- 由于C++语言的运行优势,多数算法模型在实际应用时需要部署到C++环境下运行,以提高算法速度和稳定性
- 本文主要讲述WIn10下在VS工程中通过Opencv部署yolov5模型,步骤包括:
1.python环境下通过export.py导出.onnx模型 2.C++环境下通过opencv的DNN模块进行模型导入和调用
- 部署完成后的检测效果如下图所示(CPU下运行,无加速!)
win10下VS2019+opencv部署yolov5
模型导出函数:
def export_onnx(model, im, file, opset, train, dynamic, simplify, prefix=colorstr('ONNX:')):
# YOLOv5 ONNX export
try:
check_requirements(('onnx',))
import onnx
LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...')
f = file.with_suffix('.onnx')
torch.onnx.export(
model.cpu() if dynamic else model, # --dynamic only compatible with cpu
im.cpu() if dynamic else im,
f,
verbose=False,
opset_version=opset,
training=torch.onnx.TrainingMode.TRAINING if train else torch.onnx.TrainingMode.EVAL,
do_constant_folding=not train,
input_names=['images'],
output_names=['output'],
dynamic_axes={
'images': {
0: 'batch',
2: 'height',
3: 'width'}, # shape(1,3,640,640)
'output': {
0: 'batch',
1: 'anchors'} # shape(1,25200,85)
} if dynamic else None)
# Checks
model_onnx = onnx.load(f) # load onnx model
onnx.checker.check_model(model_onnx) # check onnx model
# Metadata
d = {'stride': int(max(model.stride)), 'names': model.names}
for k, v in d.items():
meta = model_onnx.metadata_props.add()
meta.key, meta.value = k, str(v)
onnx.save(model_onnx, f)
# Simplify
if simplify:
try:
cuda = torch.cuda.is_available()
check_requirements(('onnxruntime-gpu' if cuda else 'onnxruntime', 'onnx-simplifier>=0.4.1'))
import onnxsim
LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
model_onnx, check = onnxsim.simplify(model_onnx)
assert check, 'assert check failed'
onnx.save(model_onnx, f)
except Exception as e:
LOGGER.info(f'{prefix} simplifier failure: {e}')
LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
return f
except Exception as e:
LOGGER.info(f'{prefix} export failure: {e}')
命令:
python export.py --weights ./weights/yolov5s.pt --opset 12 --include onnx
若导出提示,是因为onnx不支持silu:
ONNX: starting export with onnx 1.13.0...
ONNX: export failure: Exporting the operator silu to ONNX opset version 12 is not supported. Please open a bug to request ONNX export support for the missing operator.
解决方案如下:
找到环境中
python
路径/Lib/site-packages/torch/nn/modules/activation.py
:
重写394行
def forward(self, input: Tensor) -> Tensor:
# 原始
# return F.silu(input, inplace=self.inplace)
# 重写
return input * torch.sigmoid(input)
python代码:
git clone https://github.com/doleron/yolov5-opencv-cpp-python.git
cd yolov5-opencv-cpp-python
python python/yolo.py
如果想用cuda在GPU下测试:
git clone https://github.com/doleron/yolov5-opencv-cpp-python.git
cd yolov5-opencv-cpp-python
python python/yolo.py cuda
如果是在Linux下调试,则需要调试环境:
Any modern Linux OS (tested on Ubuntu 20.04)
OpenCV 4.5.4+
Python 3.7+ (only if you are intended to run the python program)
GCC 9.0+ (only if you are intended to run the C++ program)
IMPORTANT!!! Note that OpenCV versions prior to 4.5.4 will not work at all.
运行C++代码:
git clone https://github.com/doleron/yolov5-opencv-cpp-python.git
cd yolov5-opencv-cpp-python
g++ -O3 cpp/yolo.cpp -o yolo_example `pkg-config --cflags --libs opencv4`
./yolo_example
Or using CUDA if available:
git clone https://github.com/doleron/yolov5-opencv-cpp-python.git
cd yolov5-opencv-cpp-python
g++ -O3 cpp/yolo.cpp -o yolo_example `pkg-config --cflags --libs opencv4`
./yolo_example cuda
在下载好的文件夹中找出yolo.cpp
,添加到已配置好项目属性的工程中,修改相应的路径运行即可得出结果
如下便是检测结果,帧率比较低是因为我在GTX1050,cpu下跑的,设置了cuda = true
,但还是自动选择了cpu调用,这是因为opencv中的DNN没有cuda,需要重新编译,即将cuda和opencv一起编译成新的库文件即可
yolo.cpp
源码如下:
#include
#include
std::vector<std::string> load_class_list()
{
std::vector<std::string> class_list;
std::ifstream ifs("yolov5-opencv/config_files/classes.txt");
std::string line;
while (getline(ifs, line))
{
class_list.push_back(line);
}
return class_list;
}
void load_net(cv::dnn::Net &net, bool is_cuda)
{
//auto result = cv::dnn::readNet("yolov5-opencv/config_files/yolov5s.onnx");
auto result = cv::dnn::readNet("yolov5s.onnx");
if (is_cuda)
{
std::cout << "Attempty to use CUDA\n";
result.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
result.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA_FP16);
}
else
{
std::cout << "Running on CPU\n";
result.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
result.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
}
net = result;
}
const std::vector<cv::Scalar> colors = {cv::Scalar(255, 255, 0), cv::Scalar(0, 255, 0), cv::Scalar(0, 255, 255), cv::Scalar(255, 0, 0)};
const float INPUT_WIDTH = 640.0;
const float INPUT_HEIGHT = 640.0;
const float SCORE_THRESHOLD = 0.2;
const float NMS_THRESHOLD = 0.4;
const float CONFIDENCE_THRESHOLD = 0.4;
struct Detection
{
int class_id;
float confidence;
cv::Rect box;
};
cv::Mat format_yolov5(const cv::Mat &source) {
int col = source.cols;
int row = source.rows;
int _max = MAX(col, row);
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
source.copyTo(result(cv::Rect(0, 0, col, row)));
return result;
}
void detect(cv::Mat &image, cv::dnn::Net &net, std::vector<Detection> &output, const std::vector<std::string> &className) {
cv::Mat blob;
auto input_image = format_yolov5(image);
cv::dnn::blobFromImage(input_image, blob, 1./255., cv::Size(INPUT_WIDTH, INPUT_HEIGHT), cv::Scalar(), true, false);
net.setInput(blob);
std::vector<cv::Mat> outputs;
net.forward(outputs, net.getUnconnectedOutLayersNames());
float x_factor = input_image.cols / INPUT_WIDTH;
float y_factor = input_image.rows / INPUT_HEIGHT;
float *data = (float *)outputs[0].data;
const int dimensions = 85;
const int rows = 25200;
std::vector<int> class_ids;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
for (int i = 0; i < rows; ++i) {
float confidence = data[4];
if (confidence >= CONFIDENCE_THRESHOLD) {
float * classes_scores = data + 5;
cv::Mat scores(1, className.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double max_class_score;
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
if (max_class_score > SCORE_THRESHOLD) {
confidences.push_back(confidence);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
data += 85;
}
std::vector<int> nms_result;
cv::dnn::NMSBoxes(boxes, confidences, SCORE_THRESHOLD, NMS_THRESHOLD, nms_result);
for (int i = 0; i < nms_result.size(); i++) {
int idx = nms_result[i];
Detection result;
result.class_id = class_ids[idx];
result.confidence = confidences[idx];
result.box = boxes[idx];
output.push_back(result);
}
}
int main(int argc, char **argv)
{
std::vector<std::string> class_list = load_class_list();
cv::Mat frame;
cv::VideoCapture capture("yolov5-opencv/sample.mp4");
if (!capture.isOpened())
{
std::cerr << "Error opening video file\n";
return -1;
}
//bool is_cuda = argc > 1 && strcmp(argv[1], "cuda") == 0;
bool is_cuda = true;
cv::dnn::Net net;
load_net(net, is_cuda);
auto start = std::chrono::high_resolution_clock::now();
int frame_count = 0;
float fps = -1;
int total_frames = 0;
while (true)
{
capture.read(frame);
if (frame.empty())
{
std::cout << "End of stream\n";
break;
}
std::vector<Detection> output;
detect(frame, net, output, class_list);
frame_count++;
total_frames++;
int detections = output.size();
for (int i = 0; i < detections; ++i)
{
auto detection = output[i];
auto box = detection.box;
auto classId = detection.class_id;
const auto color = colors[classId % colors.size()];
cv::rectangle(frame, box, color, 3);
cv::rectangle(frame, cv::Point(box.x, box.y - 20), cv::Point(box.x + box.width, box.y), color, cv::FILLED);
cv::putText(frame, class_list[classId].c_str(), cv::Point(box.x, box.y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
if (frame_count >= 30)
{
auto end = std::chrono::high_resolution_clock::now();
fps = frame_count * 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
frame_count = 0;
start = std::chrono::high_resolution_clock::now();
}
if (fps > 0)
{
std::ostringstream fps_label;
fps_label << std::fixed << std::setprecision(2);
fps_label << "FPS: " << fps;
std::string fps_label_str = fps_label.str();
cv::putText(frame, fps_label_str.c_str(), cv::Point(10, 25), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2);
}
cv::imshow("output", frame);
if (cv::waitKey(1) != -1)
{
capture.release();
std::cout << "finished by user\n";
break;
}
}
std::cout << "Total frames: " << total_frames << "\n";
return 0;
}