深度学习目标检测之——YOLO-v3目标检测(windows端调用)

前言

目前基于深度学习的目标检测越来越火,其准确度很高。笔者采用Yolo-v3实现目标检测。Yolo-v3基于darknet框架,该框架采用纯c语言,不依赖来其他第三方库,相对于caffe框架在易用性对开发者友好(笔者编译过数次caffe才成功)。本文基于windows平台将yolo-v3编译为动态链接库dll,测试其检测性能。

New, python接口的YOLO-v3, !!!, 走过不要错过


为了方便测试,本人将测试通过的Visual Studio工程贴出来
Yolov3-windows测试工程
链接:https://pan.baidu.com/s/1i6ZK2ZCGzWbfWT1-_fUTzg
提取码:rj9o

复制这段内容后打开百度网盘手机App,操作更方便哦

image.png

(https://upload-images.jianshu.io/upload_images/11478104-38a8683cb7a03a38.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)


开发环境

  • windows 10 x64
  • Visual Studio 2017
  • opencv3.4.0
  • darknet 笔者直接fork自AlexeyAB/darknet

动态链接库.dll的编译过程就不再赘述,相信熟悉C++,编译过opencv的小伙伴都很容易,本文测试使用的的cpu-only版本,笔者编译过cpu-only, gpu两个版本的yolo-v3 dll,需要dll的请点赞支持哦。


Yolo-v3

  • 代码 C++,opencv
    需要的文件可以在darknet链接下载打到:
  1. yolov3.cfg,yolov3的网络结构描述文件
  2. yolov3.weights,yolov3训练好的权重文件,在coco数据集上训练的
  3. coco.names, coco数据集的目标类别文件
#include
#include
#include

using namespace std;
using namespace cv;

const string CFG_FILE = "darknet-master\\cfg\\yolov3.cfg";
const string WEIGHT_FILE = "yolov3.weights";
const string COCO_NAMES = "darknet-master\\cfg\\coco.names";


class Object
{
public:
    Object();
    Object::Object(int id, float confidence, Rect rect, String name);
    ~Object();

public:
    int id;
    float confidence;
    Rect rect;
    String name;

private:

};

Object::Object() {
}

Object::Object(int id,float confidence,Rect rect,String name) {
    this->id = id;
    this->confidence = confidence;
    this->rect = rect;
    this->name = name;
}

Object::~Object() {
}


int main() {

    //--------------------------实例化一个Yolo检测器---------------------------
    Detector yolo_detector(CFG_FILE, WEIGHT_FILE);

    //读取目标类别文件,80类
    vector classNames;
    ifstream fileIn(COCO_NAMES, ios::in);
    if (!fileIn.is_open()) {
        cerr << "failed to load COCO.names!" << endl;
        return -1;
    }
    for (int i = 0; i < 80; i++) {
        char temp1[100];
        fileIn.getline(temp1, 100);
        string temp2(temp1);
        classNames.push_back(String(temp2));

    }

    //---------------------------加载输入图像-----------------------------------
    auto image = Detector::load_image("7.jpg");
    cout << "图像宽度=" << image.w << endl
        << "图像高度=" << image.h << endl
        << "图像通道=" << image.c << endl;

    //-----------------------------目标检测---------------------------------------
    TickMeter t;
    t.start();
    auto res = yolo_detector.detect(image);
    t.stop();
    cout << "YOLO-v3检测时间=" << t.getTimeSec() << "sec" << endl;

    //----------------------------解析检测结果---------------------------------------
    vector detectObjects;
    for (auto& i:res) {
        int id = i.obj_id;
        float confidence = i.prob;
        String name = classNames[id];
        Rect rect = Rect{ static_cast(i.x),static_cast(i.y),static_cast(i.w),static_cast(i.h) };

        detectObjects.push_back(Object{ id,confidence,rect,name });
    }

    //----------------------------绘制结果---------------------------------------------
    Mat im_src = imread("7.jpg");
    for (auto& i:detectObjects) {
        rectangle(im_src, i.rect, Scalar(0, 255, 255), 2);
        putText(im_src, i.name, i.rect.tl(), 1, 1.8, Scalar(255, 0, 0),2);
    }


    imshow("yolo-v3", im_src);
    waitKey(0);
}


  • API介绍
    yolo_v2_class.hpp 中定义了Detector,将yolo封装到C++类中,方便使用。下面是Detector类的源码。
class Detector {
    std::shared_ptr detector_gpu_ptr;
    std::deque> prev_bbox_vec_deque;
    const int cur_gpu_id;
public:
    float nms = .4;
    bool wait_stream;

    YOLODLL_API Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0);
    YOLODLL_API ~Detector();

    YOLODLL_API std::vector detect(std::string image_filename, float thresh = 0.2, bool use_mean = false);
    YOLODLL_API std::vector detect(image_t img, float thresh = 0.2, bool use_mean = false);
    static YOLODLL_API image_t load_image(std::string image_filename);
    static YOLODLL_API void free_image(image_t m);
    YOLODLL_API int get_net_width() const;
    YOLODLL_API int get_net_height() const;

    YOLODLL_API std::vector tracking_id(std::vector cur_bbox_vec, bool const change_history = true, 
                                                int const frames_story = 10, int const max_dist = 150);

    std::vector detect_resized(image_t img, int init_w, int init_h, float thresh = 0.2, bool use_mean = false)
    {
        if (img.data == NULL)
            throw std::runtime_error("Image is empty");
        auto detection_boxes = detect(img, thresh, use_mean);
        float wk = (float)init_w / img.w, hk = (float)init_h / img.h;
        for (auto &i : detection_boxes) i.x *= wk, i.w *= wk, i.y *= hk, i.h *= hk;
        return detection_boxes;
    }

#ifdef OPENCV
    std::vector detect(cv::Mat mat, float thresh = 0.2, bool use_mean = false)
    {
        if(mat.data == NULL)
            throw std::runtime_error("Image is empty");
        auto image_ptr = mat_to_image_resize(mat);
        return detect_resized(*image_ptr, mat.cols, mat.rows, thresh, use_mean);
    }

    std::shared_ptr mat_to_image_resize(cv::Mat mat) const
    {
        if (mat.data == NULL) return std::shared_ptr(NULL);
        cv::Mat det_mat;
        cv::resize(mat, det_mat, cv::Size(get_net_width(), get_net_height()));
        return mat_to_image(det_mat);
    }

    static std::shared_ptr mat_to_image(cv::Mat img_src)
    {
        cv::Mat img;
        cv::cvtColor(img_src, img, cv::COLOR_RGB2BGR);
        std::shared_ptr image_ptr(new image_t, [](image_t *img) { free_image(*img); delete img; });
        std::shared_ptr ipl_small = std::make_shared(img);
        *image_ptr = ipl_to_image(ipl_small.get());
        return image_ptr;
    }

private:

    static image_t ipl_to_image(IplImage* src)
    {
        unsigned char *data = (unsigned char *)src->imageData;
        int h = src->height;
        int w = src->width;
        int c = src->nChannels;
        int step = src->widthStep;
        image_t out = make_image_custom(w, h, c);
        int count = 0;

        for (int k = 0; k < c; ++k) {
            for (int i = 0; i < h; ++i) {
                int i_step = i*step;
                for (int j = 0; j < w; ++j) {
                    out.data[count++] = data[i_step + j*c + k] / 255.;
                }
            }
        }

        return out;
    }

    static image_t make_empty_image(int w, int h, int c)
    {
        image_t out;
        out.data = 0;
        out.h = h;
        out.w = w;
        out.c = c;
        return out;
    }

    static image_t make_image_custom(int w, int h, int c)
    {
        image_t out = make_empty_image(w, h, c);
        out.data = (float *)calloc(h*w*c, sizeof(float));
        return out;
    }

#endif  // OPENCV

};

  • 主要的方法:
    1. 构造方法
YOLODLL_API Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0);

输入:配置文件(.cfg) 权重文件(.weight) gui_id表示使用的哪个GPU

  1. 加载输入图像
static YOLODLL_API image_t load_image(std::string image_filename);

输入:图像名称
此方法为静态方法,将二维图像转为张量Tensor


  • 测试图像 均来自百度图片
    • 动物,人
0.jpg
1.jpg
2.jpg

  • 车辆,人
3.jpg
4.jpg
5.jpg

测试结果

  • 动物,人的检测
    image.png
image.png
image.png
image.png
image.png
image.png
  • 车辆,行人的检测
    image.png

    image.png
image.png
image.png
image.png
image.png

从以上测试结果来看,yolo-v3的准确度上性能非凡,较小尺寸的目标也可以检测到。相对于MobileNet-SSD(v1版本)准确度上要好。


Yolo-v3封装为python接口

yolo原始接口采用C语言, 对于不熟悉C/C++的同学不友好, 其次C语言每次都要编译,相对麻烦。 现在深度学习采用python 才是标配, 因此本人利用pybind11封装为python API。

首先需要配置pybind11, 见文章:pybind11使用

image.png

工程配置

image.png

pybind11封装接口
python_api.cpp

#include
#include
#include
#include
#include
#include
#include

using namespace cv;

namespace py = pybind11;


class Object
{
public:
    Object();
    Object(int id, float confidence, std::vector rect, std::string name);
    ~Object();

public:
    int id;
    float confidence;
    std::vector rect;  //[xmin, ymin, xmax, ymax]
    std::string name;


};

Object::Object() {
}

Object::Object(int id, float confidence, std::vector rect, std::string name) {
    this->id = id;
    this->confidence = confidence;
    this->rect = rect;
    this->name = name;
}

Object::~Object() {
}



class YoloDetector : public Detector {

public:
    std::string weights_file;
    std::string cfg_file;

private:
    std::vector classNames;
    image_t cvMat_to_image_t(cv::Mat& image) {
    
        image_t dst;
        dst.w = image.cols;
        dst.h = image.rows;
        dst.c = image.channels();

        dst.data = new float[dst.w*dst.h*dst.c * sizeof(float)];
        int count = 0;
        for (int i = 0; i < image.rows; i++)
        {
            for (int j = 0; j < image.cols; j++)
            {
                cv::Vec3b pixel = image.at(i, j);
                dst.data[count] = (float)pixel[0];
                dst.data[count+1] = (float)pixel[1];
                dst.data[count+2] = (float)pixel[2];

                count += 3;

            }
        }

        return dst;
    }

public:
    YoloDetector(std::string weights_file, std::string cfg_file) :Detector(cfg_file, weights_file) {
        this->weights_file = weights_file;
        this->cfg_file = cfg_file;
    };

    ~YoloDetector() {};

public:

    void setCOCOName(std::vector names) {
        for (auto i: names)
        {
            this->classNames.push_back(i);
        }
    }

    //// image: BGR Format
    //std::vector detectImage(cv::Mat& image) {

    //  Mat rgb_image;
    //  cvtColor(image, rgb_image, COLOR_BGR2RGB);

    //  cv::resize(rgb_image, rgb_image, cv::Size(this->get_net_width(), this->get_net_height()));

    //  image_t image_ = this->cvMat_to_image_t(rgb_image);
    //

    //  auto res = this->detect(image_);
    //  std::vector objs;

    //  for (auto i:res)
    //  {
    //      objs.push_back(Object(i.obj_id, i.prob, { (int)i.x, (int)i.y, (int)(i.x + i.w), (int)(i.y + i.h) }, classNames[i.obj_id]));
    //  }

    //  return objs;

    //}



        // image: BGR Format
    std::vector detectImage(std::string image_name) {


        auto res = this->detect(Detector::load_image(image_name));
        std::vector objs;

        for (auto i : res)
        {
            objs.push_back(Object(i.obj_id, i.prob, { (int)i.x, (int)i.y, (int)(i.x + i.w), (int)(i.y + i.h) }, classNames[i.obj_id]));
        }

        return objs;

    }


};


#if 0

int main() {

    const std::string CFG_FILE = "D:\\YOLO-v3\\darknet-master\\cfg\\yolov3.cfg";
    const std::string WEIGHT_FILE = "D:\\YOLO-v3\\yolov3.weights";
    const std::string COCO_NAMES = "D:\\YOLO-v3\\darknet-master\\cfg\\coco.names";

    //读取目标类别文件,80类
    std::vector classNames;
    std::ifstream fileIn(COCO_NAMES, std::ios::in);
    if (!fileIn.is_open()) {
        std::cerr << "failed to load COCO.names!" << std::endl;
        return -1;
    }
    for (int i = 0; i < 80; i++) {
        char temp1[100];
        fileIn.getline(temp1, 100);
        std::string temp2(temp1);
        classNames.push_back(temp2);

    }

    YoloDetector detector(WEIGHT_FILE, CFG_FILE);
    detector.setCOCOName(classNames);

    cv::Mat image = cv::imread("D:\\YOLO-v3\\darknet-test.jpg");

    auto detectObjects = detector.detectImage("D:\\YOLO-v3\\darknet-test.jpg");


    for (auto& i : detectObjects) {
        cv::rectangle(image,Rect(i.rect[0],i.rect[1], i.rect[2]-i.rect[0], i.rect[3]-i.rect[1]), Scalar(0, 255, 255), 2);
        //putText(image, i.name, i.rect.tl(), 1, 1.8, Scalar(255, 0, 0), 2);
    }

    imshow("yolo", image);
    waitKey(0);




}

#endif

#if 1
PYBIND11_MODULE(yolov3, m) {

    py::class_(m, "Object")
        .def(py::init, std::string>())
        .def_readwrite("id", &Object::id)
        .def_readwrite("confidence", &Object::confidence)
        .def_readwrite("rect", &Object::rect)
        .def_readwrite("name", &Object::name);

    py::class_(m, "YoloDetector")
        .def(py::init())
        .def("detectImage", &YoloDetector::detectImage)
        .def("setCOCOName", &YoloDetector::setCOCOName);

}

#endif 


生成python可以调用的动态库

image.png

在pycharm中调用
new一个工程, 在工程目录下new一个 package

image.png

python代码

import demo18.yolov3 as yolov3
import cv2


detector = yolov3.YoloDetector('D:\\YOLO-v3\\yolov3.weights', 'D:\\YOLO-v3\\darknet-master\\cfg\\yolov3.cfg')
help(detector)

COCOName = []
with open('D:\\YOLO-v3\\darknet-master\\cfg\\coco.names', 'r') as f:
    for i in f:
        COCOName.append(i.rstrip())

detector.setCOCOName(COCOName)

out = detector.detectImage('D:\\YOLO-v3\\darknet-test.jpg')

image = cv2.imread('D:\\YOLO-v3\\darknet-test.jpg')
for i in out:
    rect = i.rect
    cv2.rectangle(image, (rect[0], rect[1]), (rect[2], rect[3]), (0, 255, 255))
    cv2.putText(image, i.name, (rect[0], rect[1]), 1, 1, (0, 0, 255))

cv2.imshow('yolo', image)
cv2.waitKey(0)


结果

image.png

image.png
image.png

上面的python接口只支持输入文件, 只能测试单张图像。对于视频目标目标检测行不通,因此,在此基础上继续封装接口。

主要实现了2个方法:

  • detectFromFile() 输入图像文件
  • detectImage() 输入numpy.ndarray对象, BGR格式

C++代码

#if 1


#include
#include
#include
#include
#include
#include
#include
#include"ndarray_converter.h"

using namespace cv;

namespace py = pybind11;


cv::Mat testCV(cv::Mat& img_bgr) {
    cv::Mat dst;
    cv::cvtColor(img_bgr, dst, COLOR_BGR2GRAY);
    return dst;
}

image_t cv_mat_to_image_t(cv::Mat& image) {

    float* data = new float[image.rows*image.cols*image.channels()];
    if (data == nullptr)
    {
        std::runtime_error("failed to malloc men!");
    }
    int cnt = 0;
    for (int i = 0; i < image.rows; i++)
    {
        for (int j = 0; j < image.cols; j++)
        {
            if (image.channels() == 3)
            {
                float r = image.at(i, j)[2] / 255.0f;
                float g = image.at(i, j)[1] / 255.0f;
                float b = image.at(i, j)[0] / 255.0f;

                data[cnt] = r;
                data[cnt + 1] = g;
                data[cnt + 2] = b;

                cnt += 3;
            }
            else
            {
                data[cnt] = static_cast(image.at(i, j)) / 255.0f;
                cnt += 1;
            }

        }
    }

    image_t imaget;
    imaget.c = image.channels();
    imaget.h = image.rows;
    imaget.w = image.cols;
    imaget.data = data;
}


image_t make_empty_image(int w, int h, int c)
{
    image_t out;
    out.data = 0;
    out.h = h;
    out.w = w;
    out.c = c;
    return out;
}


image_t make_image(int w, int h, int c)
{
    image_t out = make_empty_image(w, h, c);
    out.data = new float[w*h*c];
    return out;
}

image_t cv_mat_to_image_t2(cv::Mat& image) {

    Mat dst;
    cv::cvtColor(image, dst, COLOR_BGR2RGB);
    int w, h, c;
    w = image.cols;
    h = image.rows;
    int channels = image.channels();
    unsigned char *data = dst.data;
    if (!data)
        throw std::runtime_error("file not found");
    if (channels) c = channels;
    int i, j, k;
    image_t im = make_image(w, h, c);
    for (k = 0; k < c; ++k) {
        for (j = 0; j < h; ++j) {
            for (i = 0; i < w; ++i) {
                int dst_index = i + w * j + w * h*k;
                int src_index = k + c * i + c * w*j;
                im.data[dst_index] = (float)data[src_index] / 255.;
            }
        }
    }
    //free(data);
    return im;
}



class Object
{
public:
    Object();
    Object(int id, float confidence, std::vector rect, std::string name);
    ~Object();

public:
    int id;
    float confidence;
    std::vector rect;  //[xmin, ymin, xmax, ymax]
    std::string name;


};

Object::Object() {
}

Object::Object(int id, float confidence, std::vector rect, std::string name) {
    this->id = id;
    this->confidence = confidence;
    this->rect = rect;
    this->name = name;
}

Object::~Object() {
}



class YoloDetector : public Detector {

public:
    std::string weights_file;
    std::string cfg_file;

private:
    std::vector classNames;

public:
    YoloDetector(std::string weights_file, std::string cfg_file) :Detector(cfg_file, weights_file) {
        this->weights_file = weights_file;
        this->cfg_file = cfg_file;
    };

    ~YoloDetector() {};

public:

    void setCOCOName(std::vector names) {
        for (auto i: names)
        {
            this->classNames.push_back(i);
        }
    }


    // image: BGR Format
    std::vector detectFromFile(std::string image_name) {


        auto res = this->detect(Detector::load_image(image_name));
        std::vector objs;

        for (auto i : res)
        {
            objs.push_back(Object(i.obj_id, i.prob, { (int)i.x, (int)i.y, (int)(i.x + i.w), (int)(i.y + i.h) }, classNames[i.obj_id]));
        }

        return objs;

    }

    /*
    overload
    */
    std::vector detectImage(cv::Mat& image) {

        /*float* data = new float[image.rows*image.cols*image.channels()];
        if (data==nullptr)
        {
            std::runtime_error("failed to malloc men!");
        }
        int cnt = 0;
        for (int i = 0; i < image.rows; i++)
        {
            for (int j = 0; j < image.cols; j++)
            {
                if (image.channels()==3)
                {
                    float r = image.at(i, j)[2] / 255.0f;
                    float g = image.at(i, j)[1] / 255.0f;
                    float b = image.at(i, j)[0] / 255.0f;

                    data[cnt] = r;
                    data[cnt+1] = g;
                    data[cnt+2] = b;

                    cnt += 3;
                }
                else
                {
                    data[cnt] = static_cast(image.at(i, j)) / 255.0f;
                    cnt += 1;
                }

            }
        }

        image_t imaget;
        imaget.c = image.channels();
        imaget.h = image.rows;
        imaget.w = image.cols;
        imaget.data = data;

        std::cout << "yolo: image input ok!" << std::endl;
        std::cout << "yolo: start to detect" << std::endl;*/

        auto imaget = cv_mat_to_image_t2(image);
        
        auto res = this->detect(imaget);

        std::cout << "yolo:finish to detect" << std::endl;

        std::vector objs;

        for (auto i : res)
        {
            objs.push_back(Object(i.obj_id, i.prob, { (int)i.x, (int)i.y, (int)(i.x + i.w), (int)(i.y + i.h) }, classNames[i.obj_id]));
        }

        return objs;
    }


};




#if 1

int main() {

    const std::string CFG_FILE = "D:\\YOLO-v3\\darknet-master\\cfg\\yolov3.cfg";
    const std::string WEIGHT_FILE = "D:\\YOLO-v3\\yolov3.weights";
    const std::string COCO_NAMES = "D:\\YOLO-v3\\darknet-master\\cfg\\coco.names";

    //读取目标类别文件,80类
    std::vector classNames;
    std::ifstream fileIn(COCO_NAMES, std::ios::in);
    if (!fileIn.is_open()) {
        std::cerr << "failed to load COCO.names!" << std::endl;
        return -1;
    }
    for (int i = 0; i < 80; i++) {
        char temp1[100];
        fileIn.getline(temp1, 100);
        std::string temp2(temp1);
        classNames.push_back(temp2);

    }

    YoloDetector detector(WEIGHT_FILE, CFG_FILE);
    detector.setCOCOName(classNames);

    cv::Mat image = cv::imread("D:\\YOLO-v3\\darknet-test.jpg");

    //auto detectObjects = detector.detectFromFile("D:\\YOLO-v3\\darknet-test.jpg");

    auto detectObjects = detector.detectImage(image);


    for (auto& i : detectObjects) {
        cv::rectangle(image,Rect(i.rect[0],i.rect[1], i.rect[2]-i.rect[0], i.rect[3]-i.rect[1]), Scalar(0, 255, 255), 2);
        //putText(image, i.name, i.rect.tl(), 1, 1.8, Scalar(255, 0, 0), 2);
    }

    imshow("yolo", image);
    waitKey(0);




}

#endif

//.def("detectImage", py::overload_cast(&YoloDetector::detectImage))
//.def("detectImage", py::overload_cast(&YoloDetector::detectImage))

#if 0
PYBIND11_MODULE(yolov3, m) {

    NDArrayConverter::init_numpy();
    

    py::class_(m, "Object")
        .def(py::init, std::string>())
        .def_readwrite("id", &Object::id)
        .def_readwrite("confidence", &Object::confidence)
        .def_readwrite("rect", &Object::rect)
        .def_readwrite("name", &Object::name);

    m.def("test_cv", &testCV, py::arg("image_bgr"));

    py::class_(m, "YoloDetector")
        .def(py::init())
        .def("detectFromFile",&YoloDetector::detectFromFile, py::arg("image_file"))
        .def("detectImage", &YoloDetector::detectImage, py::arg("image_bgr"))
        .def("setCOCOName", &YoloDetector::setCOCOName);

}

#endif 


#endif // 0

结果


image.png

End

本文主要实现来了windows平台下yolo-v3的快速测试使用,关于yolo网络结构的设计,yolo模型的训练,下期再详细介绍,感谢甜心的大力支持。

你可能感兴趣的:(深度学习目标检测之——YOLO-v3目标检测(windows端调用))