yolov5 模型部署NCNN(详细过程)

1、在github官网输入 ncnn ,找到Tencen\ncnn 下载ncnn
yolov5 模型部署NCNN(详细过程)_第1张图片
2.打开vs2017 工具 x64 Native Tools Command Prompt for VS 2017
yolov5 模型部署NCNN(详细过程)_第2张图片


cd <protobuf-root-dir>
mkdir build
cd build
cmake -G"NMake Makefiles" -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=%cd%/install -Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_MSVC_STATIC_RUNTIME=OFF ../cmake
nmake install

yolov5 模型部署NCNN(详细过程)_第3张图片


cd <ncnn-root-dir>
mkdir -p build
cd build
cmake -G"NMake Makefiles" -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=%cd%/install -DProtobuf_INCLUDE_DIR=<protobuf-root-dir>/build/install/include -DProtobuf_LIBRARIES=<protobuf-root-dir>/build/install/lib/libprotobuf.lib -DProtobuf_PROTOC_EXECUTABLE=<protobuf-root-dir>/build/install/bin/protoc.exe -DNCNN_VULKAN=ON ..  //需要一次性全部输入
nmake install

7.编译过程出现报错,查找原因。出现有的子模块没有下载下来,对应参照CmakeLists.txt 244行发现glslang文件夹为空。

CMake Error at CMakeLists.txt:244 (message):
  The submodules were not downloaded! Please update submodules with "git
  submodule update --init" and try again.

8.检查发现是命令中-DNCNN_VULKAN=ON,由于没有安装VULKAN ,所以改成-DNCNN_VULKAN=OFF。编译成功



python models/export.py --weights ./runs/train/exp22/weights/yolov5.pt         //使用yolov5自带的export 将pt模型转换成onnx                                                                                                                  
pip install onnx-simplifier //安装onnx-simplifier
python -m onnxsim ./yolov5.onnx ./yolo5-sim.onnx //简化转换得到的onnx

2.使用onnx2ncnn.exe 转换模型

cd tools/onnx
onnx2ncnn.exe  <your-model-path>/model-sim.onnx  <save-model-path>/model-sim.param <save-model-path>/model-sim.bin


Unsupported slice step !
Unsupported slice step !
Unsupported slice step !
Unsupported slice step !
Unsupported slice step !
Unsupported slice step !
Unsupported slice step !
Unsupported slice step !

yolov5 模型部署NCNN(详细过程)_第4张图片
5.除去split 和crop 及concat层,一共十层,换成YoloV5Focus,总层数309变成300,如图所示。

6.将Permute的前一层Reshape 中0=xxx 改成 0 = -1。修改结果如下:
yolov5 模型部署NCNN(详细过程)_第5张图片
7.将修改完毕的.param 和.bin文件加载,测试完整代码如下:

#include "layer.h"
#include "net.h"

#include "simpleocv.h"

class YoloV5Focus : public ncnn::Layer
        one_blob_only = true;

    virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob, const ncnn::Option& opt) const
        int w = bottom_blob.w;
        int h = bottom_blob.h;
        int channels = bottom_blob.c;

        int outw = w / 2;
        int outh = h / 2;
        int outc = channels * 4;

        top_blob.create(outw, outh, outc, 4u, 1, opt.blob_allocator);
        if (top_blob.empty())
            return -100;

        #pragma omp parallel for num_threads(opt.num_threads)
        for (int p = 0; p < outc; p++)
            const float* ptr = bottom_blob.channel(p % channels).row((p / channels) % 2) + ((p / channels) / 2);
            float* outptr = top_blob.channel(p);

            for (int i = 0; i < outh; i++)
                for (int j = 0; j < outw; j++)
                    *outptr = *ptr;

                    outptr += 1;
                    ptr += 2;

                ptr += w;

        return 0;


struct Object
    cv::Rect_<float> rect;
    int label;
    float prob;

static inline float intersection_area(const Object& a, const Object& b)
    cv::Rect_<float> inter = a.rect & b.rect;
    return inter.area();

static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
    int i = left;
    int j = right;
    float p = faceobjects[(left + right) / 2].prob;

    while (i <= j)
        while (faceobjects[i].prob > p)

        while (faceobjects[j].prob < p)

        if (i <= j)
            // swap
            std::swap(faceobjects[i], faceobjects[j]);


    #pragma omp parallel sections
        #pragma omp section
            if (left < j) qsort_descent_inplace(faceobjects, left, j);
        #pragma omp section
            if (i < right) qsort_descent_inplace(faceobjects, i, right);

static void qsort_descent_inplace(std::vector<Object>& faceobjects)
    if (faceobjects.empty())

    qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);

static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)

    const int n = faceobjects.size();

    std::vector<float> areas(n);
    for (int i = 0; i < n; i++)
        areas[i] = faceobjects[i].rect.area();

    for (int i = 0; i < n; i++)
        const Object& a = faceobjects[i];

        int keep = 1;
        for (int j = 0; j < (int)picked.size(); j++)
            const Object& b = faceobjects[picked[j]];

            // intersection over union
            float inter_area = intersection_area(a, b);
            float union_area = areas[i] + areas[picked[j]] - inter_area;
            // float IoU = inter_area / union_area
            if (inter_area / union_area > nms_threshold)
                keep = 0;

        if (keep)

static inline float sigmoid(float x)
    return static_cast<float>(1.f / (1.f + exp(-x)));

static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& in_pad, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects)
    const int num_grid = feat_blob.h;

    int num_grid_x;
    int num_grid_y;
    if (in_pad.w > in_pad.h)
        num_grid_x = in_pad.w / stride;
        num_grid_y = num_grid / num_grid_x;
        num_grid_y = in_pad.h / stride;
        num_grid_x = num_grid / num_grid_y;

    const int num_class = feat_blob.w - 5;

    const int num_anchors = anchors.w / 2;

    for (int q = 0; q < num_anchors; q++)
        const float anchor_w = anchors[q * 2];
        const float anchor_h = anchors[q * 2 + 1];

        const ncnn::Mat feat = feat_blob.channel(q);

        for (int i = 0; i < num_grid_y; i++)
            for (int j = 0; j < num_grid_x; j++)
                const float* featptr = feat.row(i * num_grid_x + j);

                // find class index with max class score
                int class_index = 0;
                float class_score = -FLT_MAX;
                for (int k = 0; k < num_class; k++)
                    float score = featptr[5 + k];
                    if (score > class_score)
                        class_index = k;
                        class_score = score;

                float box_score = featptr[4];

                float confidence = sigmoid(box_score) * sigmoid(class_score);

                if (confidence >= prob_threshold)
                    // yolov5/models/yolo.py Detect forward
                    // y = x[i].sigmoid()
                    // y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i]  # xy
                    // y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh

                    float dx = sigmoid(featptr[0]);
                    float dy = sigmoid(featptr[1]);
                    float dw = sigmoid(featptr[2]);
                    float dh = sigmoid(featptr[3]);

                    float pb_cx = (dx * 2.f - 0.5f + j) * stride;
                    float pb_cy = (dy * 2.f - 0.5f + i) * stride;

                    float pb_w = pow(dw * 2.f, 2) * anchor_w;
                    float pb_h = pow(dh * 2.f, 2) * anchor_h;

                    float x0 = pb_cx - pb_w * 0.5f;
                    float y0 = pb_cy - pb_h * 0.5f;
                    float x1 = pb_cx + pb_w * 0.5f;
                    float y1 = pb_cy + pb_h * 0.5f;

                    Object obj;
                    obj.rect.x = x0;
                    obj.rect.y = y0;
                    obj.rect.width = x1 - x0;
                    obj.rect.height = y1 - y0;
                    obj.label = class_index;
                    obj.prob = confidence;


static int detect_yolov5(const cv::Mat& bgr, std::vector<Object>& objects)
    ncnn::Net yolov5;

    yolov5.opt.use_vulkan_compute = true;
    // yolov5.opt.use_bf16_storage = true;

    yolov5.register_custom_layer("YoloV5Focus", YoloV5Focus_layer_creator);

    // original pretrained model from https://github.com/ultralytics/yolov5
    // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models

    const int target_size = 640;
    const float prob_threshold = 0.25f;
    const float nms_threshold = 0.45f;

    int img_w = bgr.cols;
    int img_h = bgr.rows;

    // letterbox pad to multiple of 32
    int w = img_w;
    int h = img_h;
    float scale = 1.f;
    if (w > h)
        scale = (float)target_size / w;
        w = target_size;
        h = h * scale;
        scale = (float)target_size / h;
        h = target_size;
        w = w * scale;

    ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h);

    // pad to target_size rectangle
    // yolov5/utils/datasets.py letterbox
    int wpad = (w + 31) / 32 * 32 - w;
    int hpad = (h + 31) / 32 * 32 - h;
    ncnn::Mat in_pad;
    ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);

    const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
    in_pad.substract_mean_normalize(0, norm_vals);

    ncnn::Extractor ex = yolov5.create_extractor();

    ex.input("images", in_pad);

    std::vector<Object> proposals;

    // anchor setting from yolov5/models/yolov5s.yaml

    // stride 8
        ncnn::Mat out;
        ex.extract("output", out);

        ncnn::Mat anchors(6);
        anchors[0] = 10.f;
        anchors[1] = 13.f;
        anchors[2] = 16.f;
        anchors[3] = 30.f;
        anchors[4] = 33.f;
        anchors[5] = 23.f;

        std::vector<Object> objects8;
        generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8);

        proposals.insert(proposals.end(), objects8.begin(), objects8.end());

    // stride 16
        ncnn::Mat out;
        ex.extract("781", out);       //对应网络修改

        ncnn::Mat anchors(6);
        anchors[0] = 30.f;
        anchors[1] = 61.f;
        anchors[2] = 62.f;
        anchors[3] = 45.f;
        anchors[4] = 59.f;
        anchors[5] = 119.f;

        std::vector<Object> objects16;
        generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16);

        proposals.insert(proposals.end(), objects16.begin(), objects16.end());

    // stride 32
        ncnn::Mat out;
        ex.extract("801", out);      //对应网络修改

        ncnn::Mat anchors(6);
        anchors[0] = 116.f;
        anchors[1] = 90.f;
        anchors[2] = 156.f;
        anchors[3] = 198.f;
        anchors[4] = 373.f;
        anchors[5] = 326.f;

        std::vector<Object> objects32;
        generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32);

        proposals.insert(proposals.end(), objects32.begin(), objects32.end());

    // sort all proposals by score from highest to lowest

    // apply nms with nms_threshold
    std::vector<int> picked;
    nms_sorted_bboxes(proposals, picked, nms_threshold);

    int count = picked.size();

    for (int i = 0; i < count; i++)
        objects[i] = proposals[picked[i]];

        // adjust offset to original unpadded
        float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
        float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
        float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
        float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;

        // clip
        x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
        y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
        x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
        y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);

        objects[i].rect.x = x0;
        objects[i].rect.y = y0;
        objects[i].rect.width = x1 - x0;
        objects[i].rect.height = y1 - y0;

    return 0;

static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
    static const char* class_names[] = {
        "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
        "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
        "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
        "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
        "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
        "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
        "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
        "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
        "hair drier", "toothbrush"

    cv::Mat image = bgr.clone();

    for (size_t i = 0; i < objects.size(); i++)
        const Object& obj = objects[i];

        fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
                obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);

        cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0));

        char text[256];
        sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob);

        int baseLine = 0;
        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);

        int x = obj.rect.x;
        int y = obj.rect.y - label_size.height - baseLine;
        if (y < 0)
            y = 0;
        if (x + label_size.width > image.cols)
            x = image.cols - label_size.width;

        cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
                      cv::Scalar(255, 255, 255), -1);

        cv::putText(image, text, cv::Point(x, y + label_size.height),
                    cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));

    cv::imshow("image", image);

int main(int argc, char** argv)
    if (argc != 2)
        fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]);
        return -1;

    const char* imagepath = argv[1];

    cv::Mat m = cv::imread(imagepath, 1);
    if (m.empty())
        fprintf(stderr, "cv::imread %s failed\n", imagepath);
        return -1;

    std::vector<Object> objects;
    detect_yolov5(m, objects);

    draw_objects(m, objects);

    return 0;



1.将param中YoloV5Focus 层改成Exp(不参与计算的层)
yolov5 模型部署NCNN(详细过程)_第6张图片
2.回到tools 路径,运行命令

ncnnoptimize.exe yolov5-sim.param yolov5-sim.bin yolov5-sim-opt.param yolov5-sim-opt.bin 65536

Input layer images without shape info, shape_inference skipped
Input layer images without shape info, estimate_memory_footprint skipped

yolov5 模型部署NCNN(详细过程)_第7张图片

