题目要求:在上一篇OpenCV----YOLACT实例分割模型推理博客中介绍了YOLACT检测和分割任务,本次兼容YOLOv5, 构建基于面向对象设计的目标检测模型框架。
yolov5 github: YOLOv5 source code
yolov5与yolov4的battle:yolov5 vs yolov4。从结果看,v5结果比v4稍差,但灵活性更高,部署更加友好。
分析:
1)opencv的DNN模块集成了很多深度学习模型,包括人脸检测、图像分类、分割、目标检测等,集成了Pytorch、tensorflow、paddlepaddle等模型框架(参看代码库OpenCV/dnn)
2)深度学习推理模型一般步骤:加载模型,包括配置文件和权重文件;输入图像预处理,转换成模型可接受的文件类型和尺寸;模型预测后处理,对于实例分割,主要是NMS后处理方法;
main.exe -h
#######
Usage: main.exe [params] image confThreshold nmsThresshold model_name
-?, -h, --help, --usage (value:true)
opecv based deep learining demo
image (value:inference/horses.jpg)
Image to process
confThreshold (value:0.5)
confidence threshold, default 0.5
nmsThresshold (value:0.5)
nms threshold, default 0.5
model_name (value:yolov5)
dnn model, default yolov5
parse wrong, please check command or type help
main.exe inference/horses.jpg 0.5 0.5 yolov5
CMakeLists.txt:
# cmake needs this line
SET(CMAKE_BUILD_TYPE "Release")
# # Define project name
# PROJECT(CppDnn)
include_directories(".../opencv/build/include" ".../opencv/build/include/opencv2")
link_directories(".../opencv/build/x64/vc15/lib")
add_executable (main main.cpp)
add_library(yolact yolact.cpp)
add_library(yolov5 yolov5.cpp)
add_library(config config.cpp)
target_link_libraries(main yolact yolov5 config opencv_world460)
# unit test
# add_executable(yolov5 yolov5.cpp)
# target_link_libraries(yolov5 config opencv_world460)
1:检测模型配置文件头文件 config.hpp
// config.hpp
extern const char* class_names[];
extern const unsigned char colors[81][3];
2: 检测模型配置实现 config.cpp
// config.cpp
#pragma once
#include
#include"config.hpp"
// coco 81 classes (background included)
extern const char* class_names[] = { "background",
"person", "bicycle", "car", "motorcycle", "airplane", "bus",
"train", "truck", "boat", "traffic light", "fire hydrant",
"stop sign", "parking meter", "bench", "bird", "cat", "dog",
"horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
"backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat",
"baseball glove", "skateboard", "surfboard", "tennis racket",
"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
"banana", "apple", "sandwich", "orange", "broccoli", "carrot",
"hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop",
"mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
"toaster", "sink", "refrigerator", "book", "clock", "vase",
"scissors", "teddy bear", "hair drier", "toothbrush"
};
// anchor color setting
extern const unsigned char colors[81][3] = {{56, 0, 255}, {226, 255, 0}, {0, 94, 255},
{0, 37, 255}, {0, 255, 94}, {255, 226, 0}, {0, 18, 255}, {255, 151, 0},
{170, 0, 255}, {0, 255, 56}, {255, 0, 75}, {0, 75, 255}, {0, 255, 169},
{255, 0, 207}, {75, 255, 0}, {207, 0, 255}, {37, 0, 255}, {0, 207, 255},
{94, 0, 255}, {0, 255, 113}, {255, 18, 0}, {255, 0, 56}, {18, 0, 255},
{0, 255, 226}, {170, 255, 0}, {255, 0, 245}, {151, 255, 0}, {132, 255, 0},
{75, 0, 255}, {151, 0, 255}, {0, 151, 255}, {132, 0, 255}, {0, 255, 245},
{255, 132, 0}, {226, 0, 255}, {255, 37, 0}, {207, 255, 0},
{0, 255, 207}, {94, 255, 0}, {0, 226, 255},
{56, 255, 0}, {255, 94, 0}, {255, 113, 0},{0, 132, 255}, {255, 0, 132},
{255, 170, 0}, {255, 0, 188}, {113, 255, 0}, {245, 0, 255}, {113, 0, 255},
{255, 188, 0}, {0, 113, 255}, {255, 0, 0}, {0, 56, 255}, {255, 0, 113},
{0, 255, 188}, {255, 0, 94}, {255, 0, 18}, {18, 255, 0}, {0, 255, 132},
{0, 188, 255}, {0, 245, 255}, {0, 169, 255},{37, 255, 0},
{255, 0, 151}, {188, 0, 255}, {0, 255, 37}, {0, 255, 0},
{255, 0, 170}, {255, 0, 37}, {255, 75, 0}, {0, 0, 255}, {255, 207, 0},
{255, 0, 226}, {255, 245, 0}, {188, 255, 0}, {0, 255, 18}, {0, 255, 75},
{0, 255, 151}, {255, 56, 0}, {245, 255, 0}
};
// network config
extern struct net_config{
float confThreshold;
float nmsThreshold;
std::string model_name;
int img_size;
std::string model_path;
};
3: yolov5推理模型
// yolov5.cpp
#include
#include
#include
#include
#include
#include
#include "config.cpp"
using namespace cv;
using namespace dnn;
using namespace std;
class yolov5
{
public:
// detail constructor
yolov5(float confThreshold, float nmsThreshold, string model_path = "model/yolov5m.onnx", const int keep_top_k = 200);
// general constructor
yolov5(net_config& config);
// detection
void detect(Mat& frame);
private:
const float anchors[3][6] = {{10.0, 13.0, 16.0, 30.0, 33.0, 23.0}, {30.0, 61.0, 62.0, 45.0, 59.0, 119.0},{116.0, 90.0, 156.0, 198.0, 373.0, 326.0}};
const float stride[3] = { 8.0, 16.0, 32.0 };
const int inpWidth = 640;
const int inpHeight = 640;
float confThreshold = 0.5;
float nmsThreshold = 0.5;
float objThreshold = 0.5;
Net net;
void drawPred(float conf, int left, int top, int right, int bottom, Mat& frame, int classid);
// post process for scores
void sigmoid(Mat* out, int length){
float* pdata = (float*)(out->data);
int i = 0;
for (i = 0; i < length; i++)
{
pdata[i] = 1.0 / (1 + expf(-pdata[i]));
}
}
};
yolov5::yolov5(float confThreshold, float nmsThreshold, string model_path, const int keep_top_k)
{
this->confThreshold = confThreshold;
this->nmsThreshold = nmsThreshold;
this->net = readNet(model_path);
}
yolov5::yolov5(net_config& config)
{
this->confThreshold = config.confThreshold;
this->nmsThreshold = config.nmsThreshold;
this->net = readNet(config.model_path);
}
void yolov5::drawPred(float conf, int left, int top, int right, int bottom, Mat& frame, int classid) // Draw the predicted bounding box
{
//Draw a rectangle displaying the bounding box
rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 2);
//Get the label for the class name and its confidence
string label = format("%.2f", conf);
label = string(class_names[classid+1]) + ":" + label;
//Display the label at the top of the bounding box
int baseLine;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
top = max(top, labelSize.height);
//rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
// **** for video detection **** //
// static const string kWinName = "yolov5 Object Detection in OpenCV";
// namedWindow(kWinName, WINDOW_NORMAL);
// imshow(kWinName, frame);
// waitKey(10);
}
void yolov5::detect(Mat& frame)
{
Mat blob;
blobFromImage(frame, blob, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false);
this->net.setInput(blob);
vector<Mat> outs;
this->net.forward(outs, this->net.getUnconnectedOutLayersNames());
// generate proposals
vector<int> classIds;
vector<float> confidences;
vector<Rect> boxes;
float ratioh = (float)frame.rows / this->inpHeight;
float ratiow = (float)frame.cols / this->inpWidth;
int n = 0, q = 0, i = 0, j = 0, nout = 80 + 5, c = 0;
for (n = 0; n < 3; n++)
{
int num_grid_x = (int)(this->inpWidth / this->stride[n]);
int num_grid_y = (int)(this->inpHeight / this->stride[n]);
int area = num_grid_x * num_grid_y;
this->sigmoid(&outs[n], 3 * nout * area);
for (q = 0; q < 3; q++)
{
const float anchor_w = this->anchors[n][q * 2];
const float anchor_h = this->anchors[n][q * 2 + 1];
float* pdata = (float*)outs[n].data + q * nout * area;
for (i = 0; i < num_grid_y; i++)
{
for (j = 0; j < num_grid_x; j++)
{
float box_score = pdata[4 * area + i * num_grid_x + j];
if (box_score > this->objThreshold)
{
float max_class_socre = 0, class_socre = 0;
int max_class_id = 0;
for (c = 0; c < 80; c++) // get max socre
{
class_socre = pdata[(c + 5) * area + i * num_grid_x + j];
if (class_socre > max_class_socre)
{
max_class_socre = class_socre;
max_class_id = c;
}
}
if (max_class_socre > this->confThreshold)
{
float cx = (pdata[i * num_grid_x + j] * 2.f - 0.5f + j) * this->stride[n]; ///cx
float cy = (pdata[area + i * num_grid_x + j] * 2.f - 0.5f + i) * this->stride[n]; ///cy
float w = powf(pdata[2 * area + i * num_grid_x + j] * 2.f, 2.f) * anchor_w; ///w
float h = powf(pdata[3 * area + i * num_grid_x + j] * 2.f, 2.f) * anchor_h; ///h
int left = (cx - 0.5*w)*ratiow;
int top = (cy - 0.5*h)*ratioh;
classIds.push_back(max_class_id);
confidences.push_back(max_class_socre);
boxes.push_back(Rect(left, top, (int)(w*ratiow), (int)(h*ratioh)));
}
}
}
}
}
}
// nms to eliminate redundant overlapping boxes with lower confidences
vector<int> indices;
NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
Rect box = boxes[idx];
this->drawPred(confidences[idx], box.x, box.y,
box.x + box.width, box.y + box.height, frame, classIds[idx]);
}
}
// unit test
// int main()
// {
// yolov5 net(0.5, 0.5, "model/yolov5m.onnx");
// string imgpath = "inference/horses.jpg";
// Mat srcimg = imread(imgpath);
// net.detect(srcimg);
// cout << "detect ! " << endl;
// static const string kWinName = "Deep learning object detection in OpenCV";
// namedWindow(kWinName, WINDOW_NORMAL);
// imshow(kWinName, srcimg);
// waitKey(0);
// destroyAllWindows();
// }
4: 整体代码结构
#define _CRT_SECURE_NO_WARNINGS
#include
#include
#include
#include
#include
#include "config.cpp"
#include "yolact.cpp"
#include "yolov5.cpp"
using namespace cv;
using namespace dnn;
using namespace std;
bool parseParam(int argc, char** argv, const char* keys, Mat& img, net_config& config){
CommandLineParser parser(argc, argv, keys);
if(parser.has("help")){
parser.printMessage();
return false;
}
if(!parser.check()){
parser.printErrors();
return false;
}
String imgFile = parser.get<String>(0);
img = imread(imgFile);
if(img.empty()){
cout << "wrong image path ! please check again." << endl;
return false;
}
config.confThreshold = parser.get<float>(1);
config.nmsThreshold = parser.get<float>(2);
config.model_name = parser.get<string>(3);
return true;
}
int main(int argc, char** argv)
{
const char* keys = {
"{help h usage ? | | opecv based deep learining demo}"
"{@image | inference/horses.jpg | Image to process}"
"{@confThreshold | 0.5 | confidence threshold, default 0.5}"
"{@nmsThresshold | 0.5 | nms threshold, default 0.5}"
"{@model_name | yolov5 | dnn model, default yolov5}"
};
net_config config;
Mat srcimg;
if(!parseParam(argc, argv, keys, srcimg, config)){
cout << "parse wrong, please check command or type help" << endl;
return 0;
}
if(config.model_name == "yolact"){
config.model_path = "model/yolact_base_54_800000.onnx";
yolact model(config);
model.detect(srcimg);
static const string kWinName = "Deep learning object detection in OpenCV";
namedWindow(kWinName, WINDOW_NORMAL);
imshow(kWinName, srcimg);
waitKey(0);
destroyAllWindows();
}else if(config.model_name == "yolov5"){
config.model_path = "model/yolov5m.onnx";
yolov5 model(config);
model.detect(srcimg);
static const string kWinName = "Deep learning object detection in OpenCV";
namedWindow(kWinName, WINDOW_NORMAL);
imshow(kWinName, srcimg);
waitKey(0);
destroyAllWindows();
// load outer video device
// VideoCapture capture(1);
// Mat frame;
// while(true){
// capture >> frame;
// resize(frame, frame, Size(640, 640), INTER_LINEAR);
// flip(frame, frame, 1);
// Mat m = frame;
// model.detect(m);
// if(waitKey(1) == 'q'){
// break;
// }
// }
}
}else{
cout << "model not defined" << endl;
}
return 0;
}