YOLOv3论文:https://pjreddie.com/media/files/papers/YOLOv3.pdf
官网和代码:https://pjreddie.com/darknet/
yolo属于one-stage(检测一步到位),兼顾准确率和速度,特别是最近的v3版本提高了小目标的检测率,是移动端目标检测的热门算法。关于YOLO原理的介绍网上有很多资料请自行百度,本文主要介绍如何在自己的cpp中调用yolov3进行目标检测。
yolo采用自定义的image格式进行图像读取和处理,而一般我们工程中使用较多的是OpenCV或者指向图像数据的指针,因此此处先对图像转换和缩放操作进行修改,代码如下:
#ifndef IMPROCESS_H
#define IMPROCESS_H
#include
void imgConvert(const cv::Mat& img, float* dst);
void imgResize(float* src, float* dst,int srcWidth,int srcHeight,int dstWidth,int dstHeight);
void resizeInner(float *src, float* dst,int srcWidth,int srcHeight,int dstWidth,int dstHeight);
#endif // IMPROCESS_H
#include
void imgConvert(const cv::Mat& img, float* dst){
uchar *data = img.data;
int h = img.rows;
int w = img.cols;
int c = img.channels();
for(int k= 0; k < c; ++k){
for(int i = 0; i < h; ++i){
for(int j = 0; j < w; ++j){
dst[k*w*h+i*w+j] = data[(i*w + j)*c + k]/255.;
}
}
}
}
void imgResize(float *src, float* dst,int srcWidth,int srcHeight,int dstWidth,int dstHeight){
int new_w = srcWidth;
int new_h = srcHeight;
if (((float)dstWidth/srcWidth) < ((float)dstHeight/srcHeight)) {
new_w = dstWidth;
new_h = (srcHeight * dstWidth)/srcWidth;
} else {
new_h = dstHeight;
new_w = (srcWidth * dstHeight)/srcHeight;
}
float* ImgReInner;
size_t sizeInner=new_w*new_h*3*sizeof(float);
ImgReInner=(float*)malloc(sizeInner);
resizeInner(src,ImgReInner,srcWidth,srcHeight,new_w,new_h);
for(int i=0;i
其中,imgConvert函数将OpenCV的图像由RGBRGBRGB...转化为yolo的RRRGGGBBB...格式(由代码可知,yolo输入图像的像素取值范围为0~1)。imgResize函数将图像缩放到cfg指定的网络输入的大小。代码修改自yolo的源码,将其image格式改为我们需要的指针形式。
接下来是调用darknet的代码,为了让代码跑通,我们首先用OpenCV读取视频,然后将OpenCV的图像转为指针指向的数据格式(如果想直接采用OpenCV可自行修改)。代码如下:
#include
#include
#include
#include
using namespace std;
using namespace cv;
float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} };
float get_color(int c, int x, int max){
float ratio = ((float)x/max)*5;
int i = floor(ratio);
int j = ceil(ratio);
ratio -= i;
float r = (1-ratio) * colors[i][c] + ratio*colors[j][c];
return r;
}
int main()
{
string cfgfile = "/home/chnn/darknet/cfg/yolov3.cfg";//读取模型文件,请自行修改相应路径
string weightfile = "/home/chnn/darknet/yolov3.weights";
float thresh=0.5;//参数设置
float nms=0.35;
int classes=80;
network *net=load_network((char*)cfgfile.c_str(),(char*)weightfile.c_str(),0);//加载网络模型
set_batch_network(net, 1);
VideoCapture capture("/home/chnn/video/videoCapture6.mp4");//读取视频,请自行修改相应路径
Mat frame;
Mat rgbImg;
vector classNamesVec;
ifstream classNamesFile("/home/chnn/darknet/data/coco.names");//标签文件coco有80类
if (classNamesFile.is_open()){
string className = "";
while (getline(classNamesFile, className))
classNamesVec.push_back(className);
}
bool stop=false;
while(!stop){
if (!capture.read(frame)){
printf("fail to read.\n");
return 0;
}
cvtColor(frame, rgbImg, cv::COLOR_BGR2RGB);
float* srcImg;
size_t srcSize=rgbImg.rows*rgbImg.cols*3*sizeof(float);
srcImg=(float*)malloc(srcSize);
imgConvert(rgbImg,srcImg);//将图像转为yolo形式
float* resizeImg;
size_t resizeSize=net->w*net->h*3*sizeof(float);
resizeImg=(float*)malloc(resizeSize);
imgResize(srcImg,resizeImg,frame.cols,frame.rows,net->w,net->h);//缩放图像
network_predict(net,resizeImg);//网络推理
int nboxes=0;
detection *dets=get_network_boxes(net,rgbImg.cols,rgbImg.rows,thresh,0.5,0,1,&nboxes);
if(nms){
do_nms_sort(dets,nboxes,classes,nms);
}
vectorboxes;
boxes.clear();
vectorclassNames;
for (int i = 0; i < nboxes; i++){
bool flag=0;
int className;
for(int j=0;jthresh){
if(!flag){
flag=1;
className=j;
}
}
}
if(flag){
int left = (dets[i].bbox.x - dets[i].bbox.w / 2.)*frame.cols;
int right = (dets[i].bbox.x + dets[i].bbox.w / 2.)*frame.cols;
int top = (dets[i].bbox.y - dets[i].bbox.h / 2.)*frame.rows;
int bot = (dets[i].bbox.y + dets[i].bbox.h / 2.)*frame.rows;
if (left < 0)
left = 0;
if (right > frame.cols - 1)
right = frame.cols - 1;
if (top < 0)
top = 0;
if (bot > frame.rows - 1)
bot = frame.rows - 1;
Rect box(left, top, fabs(left - right), fabs(top - bot));
boxes.push_back(box);
classNames.push_back(className);
}
}
free_detections(dets, nboxes);
for(int i=0;i=0)
waitKey(0);
free(srcImg);
free(resizeImg);
}
free_network(net);
capture.release();
return 1;
}
链接上darknet的动态库并让代码运行,最后得出来的结果应该是这样的:
请忽略人体姿态信息。。。