yolo目标检测软件 c语言,深度学习目标检测之——YOLO-v3目标检测(windows端调用)...

前言

目前基于深度学习的目标检测越来越火,其准确度很高。笔者采用Yolo-v3实现目标检测。Yolo-v3基于darknet框架,该框架采用纯c语言,不依赖来其他第三方库,相对于caffe框架在易用性对开发者友好(笔者编译过数次caffe才成功)。本文基于windows平台将yolo-v3编译为动态链接库dll,测试其检测性能。

New, python接口的YOLO-v3, !!!, 走过不要错过

为了方便测试,本人将测试通过的Visual Studio工程贴出来

Yolov3-windows测试工程

链接:https://pan.baidu.com/s/1i6ZK2ZCGzWbfWT1-_fUTzg

提取码:rj9o

复制这段内容后打开百度网盘手机App,操作更方便哦

image.png

开发环境

windows 10 x64

Visual Studio 2017

opencv3.4.0

动态链接库.dll的编译过程就不再赘述,相信熟悉C++,编译过opencv的小伙伴都很容易,本文测试使用的的cpu-only版本,笔者编译过cpu-only, gpu两个版本的yolo-v3 dll,需要dll的请点赞支持哦。

Yolo-v3

代码 C++,opencv

需要的文件可以在darknet链接下载打到:

yolov3.cfg,yolov3的网络结构描述文件

yolov3.weights,yolov3训练好的权重文件,在coco数据集上训练的

coco.names, coco数据集的目标类别文件

#include

#include

#include

using namespace std;

using namespace cv;

const string CFG_FILE = "darknet-master\\cfg\\yolov3.cfg";

const string WEIGHT_FILE = "yolov3.weights";

const string COCO_NAMES = "darknet-master\\cfg\\coco.names";

class Object

{

public:

Object();

Object::Object(int id, float confidence, Rect rect, String name);

~Object();

public:

int id;

float confidence;

Rect rect;

String name;

private:

};

Object::Object() {

}

Object::Object(int id,float confidence,Rect rect,String name) {

this->id = id;

this->confidence = confidence;

this->rect = rect;

this->name = name;

}

Object::~Object() {

}

int main() {

//--------------------------实例化一个Yolo检测器---------------------------

Detector yolo_detector(CFG_FILE, WEIGHT_FILE);

//读取目标类别文件,80类

vector classNames;

ifstream fileIn(COCO_NAMES, ios::in);

if (!fileIn.is_open()) {

cerr << "failed to load COCO.names!" << endl;

return -1;

}

for (int i = 0; i < 80; i++) {

char temp1[100];

fileIn.getline(temp1, 100);

string temp2(temp1);

classNames.push_back(String(temp2));

}

//---------------------------加载输入图像-----------------------------------

auto image = Detector::load_image("7.jpg");

cout << "图像宽度=" << image.w << endl

<< "图像高度=" << image.h << endl

<< "图像通道=" << image.c << endl;

//-----------------------------目标检测---------------------------------------

TickMeter t;

t.start();

auto res = yolo_detector.detect(image);

t.stop();

cout << "YOLO-v3检测时间=" << t.getTimeSec() << "sec" << endl;

//----------------------------解析检测结果---------------------------------------

vector detectObjects;

for (auto& i:res) {

int id = i.obj_id;

float confidence = i.prob;

String name = classNames[id];

Rect rect = Rect{ static_cast(i.x),static_cast(i.y),static_cast(i.w),static_cast(i.h) };

detectObjects.push_back(Object{ id,confidence,rect,name });

}

//----------------------------绘制结果---------------------------------------------

Mat im_src = imread("7.jpg");

for (auto& i:detectObjects) {

rectangle(im_src, i.rect, Scalar(0, 255, 255), 2);

putText(im_src, i.name, i.rect.tl(), 1, 1.8, Scalar(255, 0, 0),2);

}

imshow("yolo-v3", im_src);

waitKey(0);

}

API介绍

yolo_v2_class.hpp 中定义了Detector,将yolo封装到C++类中,方便使用。下面是Detector类的源码。

class Detector {

std::shared_ptr detector_gpu_ptr;

std::deque<:vector>> prev_bbox_vec_deque;

const int cur_gpu_id;

public:

float nms = .4;

bool wait_stream;

YOLODLL_API Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0);

YOLODLL_API ~Detector();

YOLODLL_API std::vector detect(std::string image_filename, float thresh = 0.2, bool use_mean = false);

YOLODLL_API std::vector detect(image_t img, float thresh = 0.2, bool use_mean = false);

static YOLODLL_API image_t load_image(std::string image_filename);

static YOLODLL_API void free_image(image_t m);

YOLODLL_API int get_net_width() const;

YOLODLL_API int get_net_height() const;

YOLODLL_API std::vector tracking_id(std::vector cur_bbox_vec, bool const change_history = true,

int const frames_story = 10, int const max_dist = 150);

std::vector detect_resized(image_t img, int init_w, int init_h, float thresh = 0.2, bool use_mean = false)

{

if (img.data == NULL)

throw std::runtime_error("Image is empty");

auto detection_boxes = detect(img, thresh, use_mean);

float wk = (float)init_w / img.w, hk = (float)init_h / img.h;

for (auto &i : detection_boxes) i.x *= wk, i.w *= wk, i.y *= hk, i.h *= hk;

return detection_boxes;

}

#ifdef OPENCV

std::vector detect(cv::Mat mat, float thresh = 0.2, bool use_mean = false)

{

if(mat.data == NULL)

throw std::runtime_error("Image is empty");

auto image_ptr = mat_to_image_resize(mat);

return detect_resized(*image_ptr, mat.cols, mat.rows, thresh, use_mean);

}

std::shared_ptr mat_to_image_resize(cv::Mat mat) const

{

if (mat.data == NULL) return std::shared_ptr(NULL);

cv::Mat det_mat;

cv::resize(mat, det_mat, cv::Size(get_net_width(), get_net_height()));

return mat_to_image(det_mat);

}

static std::shared_ptr mat_to_image(cv::Mat img_src)

{

cv::Mat img;

cv::cvtColor(img_src, img, cv::COLOR_RGB2BGR);

std::shared_ptr image_ptr(new image_t, [](image_t *img) { free_image(*img); delete img; });

std::shared_ptr ipl_small = std::make_shared(img);

*image_ptr = ipl_to_image(ipl_small.get());

return image_ptr;

}

private:

static image_t ipl_to_image(IplImage* src)

{

unsigned char *data = (unsigned char *)src->imageData;

int h = src->height;

int w = src->width;

int c = src->nChannels;

int step = src->widthStep;

image_t out = make_image_custom(w, h, c);

int count = 0;

for (int k = 0; k < c; ++k) {

for (int i = 0; i < h; ++i) {

int i_step = i*step;

for (int j = 0; j < w; ++j) {

out.data[count++] = data[i_step + j*c + k] / 255.;

}

}

}

return out;

}

static image_t make_empty_image(int w, int h, int c)

{

image_t out;

out.data = 0;

out.h = h;

out.w = w;

out.c = c;

return out;

}

static image_t make_image_custom(int w, int h, int c)

{

image_t out = make_empty_image(w, h, c);

out.data = (float *)calloc(h*w*c, sizeof(float));

return out;

}

#endif // OPENCV

};

主要的方法:

构造方法

YOLODLL_API Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0);

输入:配置文件(.cfg) 权重文件(.weight) gui_id表示使用的哪个GPU

加载输入图像

static YOLODLL_API image_t load_image(std::string image_filename);

输入:图像名称

此方法为静态方法,将二维图像转为张量Tensor

测试图像 均来自百度图片

动物,人

0.jpg

1.jpg

2.jpg

车辆,人

3.jpg

4.jpg

5.jpg

测试结果

动物,人的检测

image.png

image.png

image.png

image.png

image.png

image.png

车辆,行人的检测

image.png

image.png

image.png

image.png

image.png

image.png

从以上测试结果来看,yolo-v3的准确度上性能非凡,较小尺寸的目标也可以检测到。相对于MobileNet-SSD(v1版本)准确度上要好。

Yolo-v3封装为python接口

yolo原始接口采用C语言, 对于不熟悉C/C++的同学不友好, 其次C语言每次都要编译,相对麻烦。 现在深度学习采用python 才是标配, 因此本人利用pybind11封装为python API。

首先需要配置pybind11, 见文章:pybind11使用

image.png

工程配置

image.png

pybind11封装接口

python_api.cpp

#include

#include

#include

#include

#include

#include

#include

using namespace cv;

namespace py = pybind11;

class Object

{

public:

Object();

Object(int id, float confidence, std::vector rect, std::string name);

~Object();

public:

int id;

float confidence;

std::vector rect; //[xmin, ymin, xmax, ymax]

std::string name;

};

Object::Object() {

}

Object::Object(int id, float confidence, std::vector rect, std::string name) {

this->id = id;

this->confidence = confidence;

this->rect = rect;

this->name = name;

}

Object::~Object() {

}

class YoloDetector : public Detector {

public:

std::string weights_file;

std::string cfg_file;

private:

std::vector<:string> classNames;

image_t cvMat_to_image_t(cv::Mat& image) {

image_t dst;

dst.w = image.cols;

dst.h = image.rows;

dst.c = image.channels();

dst.data = new float[dst.w*dst.h*dst.c * sizeof(float)];

int count = 0;

for (int i = 0; i < image.rows; i++)

{

for (int j = 0; j < image.cols; j++)

{

cv::Vec3b pixel = image.at(i, j);

dst.data[count] = (float)pixel[0];

dst.data[count+1] = (float)pixel[1];

dst.data[count+2] = (float)pixel[2];

count += 3;

}

}

return dst;

}

public:

YoloDetector(std::string weights_file, std::string cfg_file) :Detector(cfg_file, weights_file) {

this->weights_file = weights_file;

this->cfg_file = cfg_file;

};

~YoloDetector() {};

public:

void setCOCOName(std::vector<:string> names) {

for (auto i: names)

{

this->classNames.push_back(i);

}

}

image: BGR Format

//std::vector detectImage(cv::Mat& image) {

// Mat rgb_image;

// cvtColor(image, rgb_image, COLOR_BGR2RGB);

// cv::resize(rgb_image, rgb_image, cv::Size(this->get_net_width(), this->get_net_height()));

// image_t image_ = this->cvMat_to_image_t(rgb_image);

//

// auto res = this->detect(image_);

// std::vector objs;

// for (auto i:res)

// {

// objs.push_back(Object(i.obj_id, i.prob, { (int)i.x, (int)i.y, (int)(i.x + i.w), (int)(i.y + i.h) }, classNames[i.obj_id]));

// }

// return objs;

//}

// image: BGR Format

std::vector detectImage(std::string image_name) {

auto res = this->detect(Detector::load_image(image_name));

std::vector objs;

for (auto i : res)

{

objs.push_back(Object(i.obj_id, i.prob, { (int)i.x, (int)i.y, (int)(i.x + i.w), (int)(i.y + i.h) }, classNames[i.obj_id]));

}

return objs;

}

};

#if 0

int main() {

const std::string CFG_FILE = "D:\\YOLO-v3\\darknet-master\\cfg\\yolov3.cfg";

const std::string WEIGHT_FILE = "D:\\YOLO-v3\\yolov3.weights";

const std::string COCO_NAMES = "D:\\YOLO-v3\\darknet-master\\cfg\\coco.names";

//读取目标类别文件,80类

std::vector<:string> classNames;

std::ifstream fileIn(COCO_NAMES, std::ios::in);

if (!fileIn.is_open()) {

std::cerr << "failed to load COCO.names!" << std::endl;

return -1;

}

for (int i = 0; i < 80; i++) {

char temp1[100];

fileIn.getline(temp1, 100);

std::string temp2(temp1);

classNames.push_back(temp2);

}

YoloDetector detector(WEIGHT_FILE, CFG_FILE);

detector.setCOCOName(classNames);

cv::Mat image = cv::imread("D:\\YOLO-v3\\darknet-test.jpg");

auto detectObjects = detector.detectImage("D:\\YOLO-v3\\darknet-test.jpg");

for (auto& i : detectObjects) {

cv::rectangle(image,Rect(i.rect[0],i.rect[1], i.rect[2]-i.rect[0], i.rect[3]-i.rect[1]), Scalar(0, 255, 255), 2);

//putText(image, i.name, i.rect.tl(), 1, 1.8, Scalar(255, 0, 0), 2);

}

imshow("yolo", image);

waitKey(0);

}

#endif

#if 1

PYBIND11_MODULE(yolov3, m) {

py::class_(m, "Object")

.def(py::init, std::string>())

.def_readwrite("id", &Object::id)

.def_readwrite("confidence", &Object::confidence)

.def_readwrite("rect", &Object::rect)

.def_readwrite("name", &Object::name);

py::class_(m, "YoloDetector")

.def(py::init<:string std::string>())

.def("detectImage", &YoloDetector::detectImage)

.def("setCOCOName", &YoloDetector::setCOCOName);

}

#endif

生成python可以调用的动态库

image.png

在pycharm中调用

new一个工程, 在工程目录下new一个 package

image.png

python代码

import demo18.yolov3 as yolov3

import cv2

detector = yolov3.YoloDetector('D:\\YOLO-v3\\yolov3.weights', 'D:\\YOLO-v3\\darknet-master\\cfg\\yolov3.cfg')

help(detector)

COCOName = []

with open('D:\\YOLO-v3\\darknet-master\\cfg\\coco.names', 'r') as f:

for i in f:

COCOName.append(i.rstrip())

detector.setCOCOName(COCOName)

out = detector.detectImage('D:\\YOLO-v3\\darknet-test.jpg')

image = cv2.imread('D:\\YOLO-v3\\darknet-test.jpg')

for i in out:

rect = i.rect

cv2.rectangle(image, (rect[0], rect[1]), (rect[2], rect[3]), (0, 255, 255))

cv2.putText(image, i.name, (rect[0], rect[1]), 1, 1, (0, 0, 255))

cv2.imshow('yolo', image)

cv2.waitKey(0)

结果

image.png

image.png

image.png

上面的python接口只支持输入文件, 只能测试单张图像。对于视频目标目标检测行不通,因此,在此基础上继续封装接口。

主要实现了2个方法:

detectFromFile() 输入图像文件

detectImage() 输入numpy.ndarray对象, BGR格式

C++代码

#if 1

#include

#include

#include

#include

#include

#include

#include

#include"ndarray_converter.h"

using namespace cv;

namespace py = pybind11;

cv::Mat testCV(cv::Mat& img_bgr) {

cv::Mat dst;

cv::cvtColor(img_bgr, dst, COLOR_BGR2GRAY);

return dst;

}

image_t cv_mat_to_image_t(cv::Mat& image) {

float* data = new float[image.rows*image.cols*image.channels()];

if (data == nullptr)

{

std::runtime_error("failed to malloc men!");

}

int cnt = 0;

for (int i = 0; i < image.rows; i++)

{

for (int j = 0; j < image.cols; j++)

{

if (image.channels() == 3)

{

float r = image.at(i, j)[2] / 255.0f;

float g = image.at(i, j)[1] / 255.0f;

float b = image.at(i, j)[0] / 255.0f;

data[cnt] = r;

data[cnt + 1] = g;

data[cnt + 2] = b;

cnt += 3;

}

else

{

data[cnt] = static_cast(image.at(i, j)) / 255.0f;

cnt += 1;

}

}

}

image_t imaget;

imaget.c = image.channels();

imaget.h = image.rows;

imaget.w = image.cols;

imaget.data = data;

}

image_t make_empty_image(int w, int h, int c)

{

image_t out;

out.data = 0;

out.h = h;

out.w = w;

out.c = c;

return out;

}

image_t make_image(int w, int h, int c)

{

image_t out = make_empty_image(w, h, c);

out.data = new float[w*h*c];

return out;

}

image_t cv_mat_to_image_t2(cv::Mat& image) {

Mat dst;

cv::cvtColor(image, dst, COLOR_BGR2RGB);

int w, h, c;

w = image.cols;

h = image.rows;

int channels = image.channels();

unsigned char *data = dst.data;

if (!data)

throw std::runtime_error("file not found");

if (channels) c = channels;

int i, j, k;

image_t im = make_image(w, h, c);

for (k = 0; k < c; ++k) {

for (j = 0; j < h; ++j) {

for (i = 0; i < w; ++i) {

int dst_index = i + w * j + w * h*k;

int src_index = k + c * i + c * w*j;

im.data[dst_index] = (float)data[src_index] / 255.;

}

}

}

//free(data);

return im;

}

class Object

{

public:

Object();

Object(int id, float confidence, std::vector rect, std::string name);

~Object();

public:

int id;

float confidence;

std::vector rect; //[xmin, ymin, xmax, ymax]

std::string name;

};

Object::Object() {

}

Object::Object(int id, float confidence, std::vector rect, std::string name) {

this->id = id;

this->confidence = confidence;

this->rect = rect;

this->name = name;

}

Object::~Object() {

}

class YoloDetector : public Detector {

public:

std::string weights_file;

std::string cfg_file;

private:

std::vector<:string> classNames;

public:

YoloDetector(std::string weights_file, std::string cfg_file) :Detector(cfg_file, weights_file) {

this->weights_file = weights_file;

this->cfg_file = cfg_file;

};

~YoloDetector() {};

public:

void setCOCOName(std::vector<:string> names) {

for (auto i: names)

{

this->classNames.push_back(i);

}

}

// image: BGR Format

std::vector detectFromFile(std::string image_name) {

auto res = this->detect(Detector::load_image(image_name));

std::vector objs;

for (auto i : res)

{

objs.push_back(Object(i.obj_id, i.prob, { (int)i.x, (int)i.y, (int)(i.x + i.w), (int)(i.y + i.h) }, classNames[i.obj_id]));

}

return objs;

}

/*

overload

*/

std::vector detectImage(cv::Mat& image) {

/*float* data = new float[image.rows*image.cols*image.channels()];

if (data==nullptr)

{

std::runtime_error("failed to malloc men!");

}

int cnt = 0;

for (int i = 0; i < image.rows; i++)

{

for (int j = 0; j < image.cols; j++)

{

if (image.channels()==3)

{

float r = image.at(i, j)[2] / 255.0f;

float g = image.at(i, j)[1] / 255.0f;

float b = image.at(i, j)[0] / 255.0f;

data[cnt] = r;

data[cnt+1] = g;

data[cnt+2] = b;

cnt += 3;

}

else

{

data[cnt] = static_cast(image.at(i, j)) / 255.0f;

cnt += 1;

}

}

}

image_t imaget;

imaget.c = image.channels();

imaget.h = image.rows;

imaget.w = image.cols;

imaget.data = data;

std::cout << "yolo: image input ok!" << std::endl;

std::cout << "yolo: start to detect" << std::endl;*/

auto imaget = cv_mat_to_image_t2(image);

auto res = this->detect(imaget);

std::cout << "yolo:finish to detect" << std::endl;

std::vector objs;

for (auto i : res)

{

objs.push_back(Object(i.obj_id, i.prob, { (int)i.x, (int)i.y, (int)(i.x + i.w), (int)(i.y + i.h) }, classNames[i.obj_id]));

}

return objs;

}

};

#if 1

int main() {

const std::string CFG_FILE = "D:\\YOLO-v3\\darknet-master\\cfg\\yolov3.cfg";

const std::string WEIGHT_FILE = "D:\\YOLO-v3\\yolov3.weights";

const std::string COCO_NAMES = "D:\\YOLO-v3\\darknet-master\\cfg\\coco.names";

//读取目标类别文件,80类

std::vector<:string> classNames;

std::ifstream fileIn(COCO_NAMES, std::ios::in);

if (!fileIn.is_open()) {

std::cerr << "failed to load COCO.names!" << std::endl;

return -1;

}

for (int i = 0; i < 80; i++) {

char temp1[100];

fileIn.getline(temp1, 100);

std::string temp2(temp1);

classNames.push_back(temp2);

}

YoloDetector detector(WEIGHT_FILE, CFG_FILE);

detector.setCOCOName(classNames);

cv::Mat image = cv::imread("D:\\YOLO-v3\\darknet-test.jpg");

//auto detectObjects = detector.detectFromFile("D:\\YOLO-v3\\darknet-test.jpg");

auto detectObjects = detector.detectImage(image);

for (auto& i : detectObjects) {

cv::rectangle(image,Rect(i.rect[0],i.rect[1], i.rect[2]-i.rect[0], i.rect[3]-i.rect[1]), Scalar(0, 255, 255), 2);

//putText(image, i.name, i.rect.tl(), 1, 1.8, Scalar(255, 0, 0), 2);

}

imshow("yolo", image);

waitKey(0);

}

#endif

//.def("detectImage", py::overload_cast<:mat>(&YoloDetector::detectImage))

//.def("detectImage", py::overload_cast<:string>(&YoloDetector::detectImage))

#if 0

PYBIND11_MODULE(yolov3, m) {

NDArrayConverter::init_numpy();

py::class_(m, "Object")

.def(py::init, std::string>())

.def_readwrite("id", &Object::id)

.def_readwrite("confidence", &Object::confidence)

.def_readwrite("rect", &Object::rect)

.def_readwrite("name", &Object::name);

m.def("test_cv", &testCV, py::arg("image_bgr"));

py::class_(m, "YoloDetector")

.def(py::init<:string std::string>())

.def("detectFromFile",&YoloDetector::detectFromFile, py::arg("image_file"))

.def("detectImage", &YoloDetector::detectImage, py::arg("image_bgr"))

.def("setCOCOName", &YoloDetector::setCOCOName);

}

#endif

#endif // 0

结果

image.png

End

本文主要实现来了windows平台下yolo-v3的快速测试使用,关于yolo网络结构的设计,yolo模型的训练,下期再详细介绍,感谢甜心的大力支持。

你可能感兴趣的:(yolo目标检测软件,c语言)