

#ifndef NCNN_H_
#define NCNN_H_

#include "net.h"


class TargetBox
    float getWidth() { return (x2 - x1); };
    float getHeight() { return (y2 - y1); };

    int x1;
    int y1;
    int x2;
    int y2;

    int cate;
    float score;

    float area() { return getWidth() * getHeight(); };

class yoloFastestv2
    ncnn::Net net;
    std::vector anchor;

    char *inputName;
    char *outputName1;
    char *outputName2;

    int numAnchor;
    int numOutput;
    int numThreads;
    int numCategory;
    int inputWidth, inputHeight;

    float nmsThresh;

    int nmsHandle(std::vector &tmpBoxes, std::vector &dstBoxes);
    int getCategory(const float *values, int index, int &category, float &score);
    int predHandle(const ncnn::Mat *out, std::vector &dstBoxes, 
                   const float scaleW, const float scaleH, const float thresh);


    int loadModel(const char* paramPath, const char* binPath);
    int detection(const cv::Mat srcImg, std::vector &dstBoxes, 
                  const float thresh = 0.3);


#include "yolo-fastestv2.h"

    printf("Creat yoloFastestv2 Detector...\n");
    numOutput = 2;
    numThreads = 4;
    //anchor num
    numAnchor = 3;
    numCategory = 80;
    nmsThresh = 0.25;

    inputWidth = 352;
    inputHeight = 352;

    inputName = "input.1";
    outputName1 = "794"; //22x22
    outputName2 = "796"; //11x11

    printf("numThreads:%d\n", numThreads);
    printf("inputWidth:%d inputHeight:%d\n", inputWidth, inputHeight);

    //anchor box w h
    std::vector bias {12.64,19.39, 37.88,51.48, 55.71,138.31, 
                             126.91,78.23, 131.57,214.55, 279.92,258.87};

    anchor.assign(bias.begin(), bias.end());

    printf("Destroy yoloFastestv2 Detector...\n");

//ncnn 模型加载
int yoloFastestv2::loadModel(const char* paramPath, const char* binPath)
    printf("Ncnn mode init:\n%s\n%s\n", paramPath, binPath);


    printf("Ncnn model init sucess...\n");

    return 0;

float intersection_area(const TargetBox &a, const TargetBox &b)
    if (a.x1 > b.x2 || a.x2 < b.x1 || a.y1 > b.y2 || a.y2 < b.y1)
        // no intersection
        return 0.f;

    float inter_width = std::min(a.x2, b.x2) - std::max(a.x1, b.x1);
    float inter_height = std::min(a.y2, b.y2) - std::max(a.y1, b.y1);

    return inter_width * inter_height;

bool scoreSort(TargetBox a, TargetBox b) 
    return (a.score > b.score); 

int yoloFastestv2::nmsHandle(std::vector &tmpBoxes, 
                             std::vector &dstBoxes)
    std::vector picked;
    sort(tmpBoxes.begin(), tmpBoxes.end(), scoreSort);

    for (int i = 0; i < tmpBoxes.size(); i++) {
        int keep = 1;
        for (int j = 0; j < picked.size(); j++) {
            float inter_area = intersection_area(tmpBoxes[i], tmpBoxes[picked[j]]);
            float union_area = tmpBoxes[i].area() + tmpBoxes[picked[j]].area() - inter_area;
            float IoU = inter_area / union_area;

            if(IoU > nmsThresh && tmpBoxes[i].cate == tmpBoxes[picked[j]].cate) {
                keep = 0;

        if (keep) {
    for (int i = 0; i < picked.size(); i++) {

    return 0;

int yoloFastestv2::getCategory(const float *values, int index, int &category, float &score)
    float tmp = 0;
    float objScore  = values[4 * numAnchor + index];

    for (int i = 0; i < numCategory; i++) {
        float clsScore = values[4 * numAnchor + numAnchor + i];
        clsScore *= objScore;

        if(clsScore > tmp) {
            score = clsScore;
            category = i;

            tmp = clsScore;
    return 0;

int yoloFastestv2::predHandle(const ncnn::Mat *out, std::vector &dstBoxes, 
                              const float scaleW, const float scaleH, const float thresh)
{    //do result
    for (int i = 0; i < numOutput; i++) {   
        int stride;
        int outW, outH, outC;

        outH = out[i].c;
        outW = out[i].h;
        outC = out[i].w;

        assert(inputHeight / outH == inputWidth / outW);
        stride = inputHeight / outH;

        for (int h = 0; h < outH; h++) {
            const float* values = out[i].channel(h);

            for (int w = 0; w < outW; w++) {
                for (int b = 0; b < numAnchor; b++) {                    
                    //float objScore = values[4 * numAnchor + b];
                    TargetBox tmpBox;
                    int category = -1;
                    float score = -1;

                    getCategory(values, b, category, score);

                    if (score > thresh) {
                        float bcx, bcy, bw, bh;

                        bcx = ((values[b * 4 + 0] * 2. - 0.5) + w) * stride;
                        bcy = ((values[b * 4 + 1] * 2. - 0.5) + h) * stride;
                        bw = pow((values[b * 4 + 2] * 2.), 2) * anchor[(i * numAnchor * 2) + b * 2 + 0];
                        bh = pow((values[b * 4 + 3] * 2.), 2) * anchor[(i * numAnchor * 2) + b * 2 + 1];
                        tmpBox.x1 = (bcx - 0.5 * bw) * scaleW;
                        tmpBox.y1 = (bcy - 0.5 * bh) * scaleH;
                        tmpBox.x2 = (bcx + 0.5 * bw) * scaleW;
                        tmpBox.y2 = (bcy + 0.5 * bh) * scaleH;
                        tmpBox.score = score;
                        tmpBox.cate = category;

                values += outC;
    return 0;

int yoloFastestv2::detection(const cv::Mat srcImg, std::vector &dstBoxes, const float thresh)

    float scaleW = (float)srcImg.cols / (float)inputWidth;
    float scaleH = (float)srcImg.rows / (float)inputHeight;
    //resize of input image data
    ncnn::Mat inputImg = ncnn::Mat::from_pixels_resize(srcImg.data, ncnn::Mat::PIXEL_BGR,\
                                                       srcImg.cols, srcImg.rows, inputWidth, inputHeight); 

    //Normalization of input image data
    const float mean_vals[3] = {0.f, 0.f, 0.f};
    const float norm_vals[3] = {1/255.f, 1/255.f, 1/255.f};
    inputImg.substract_mean_normalize(mean_vals, norm_vals);  

    //creat extractor
    ncnn::Extractor ex = net.create_extractor();

    //set input tensor
    ex.input(inputName, inputImg);

    ncnn::Mat out[2]; 
    ex.extract(outputName1, out[0]); //22x22
    ex.extract(outputName2, out[1]); //11x11

    std::vector tmpBoxes;
    predHandle(out, tmpBoxes, scaleW, scaleH, thresh);

    nmsHandle(tmpBoxes, dstBoxes);
    return 0;


#include "yolo-fastestv2.h"

int main()
    static const char* class_names[] = {
        "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
        "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
        "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
        "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
        "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
        "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
        "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
        "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
        "hair drier", "toothbrush"
    yoloFastestv2 api;


    cv::Mat cvImg = cv::imread("test.jpg");

    std::vector boxes;
    api.detection(cvImg, boxes);

    for (int i = 0; i < boxes.size(); i++) {
        std::cout< cvImg.cols)
            x = cvImg.cols - label_size.width;

        cv::rectangle(cvImg, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
                      cv::Scalar(255, 255, 255), -1);

        cv::putText(cvImg, text, cv::Point(x, y + label_size.height),
                    cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));

        cv::rectangle (cvImg, cv::Point(boxes[i].x1, boxes[i].y1), 
                       cv::Point(boxes[i].x2, boxes[i].y2), cv::Scalar(255, 255, 0), 2, 2, 0);
    cv::imwrite("output.png", cvImg);

    return 0;
