




// specific language governing permissions and limitations under the License.


#include "detection.h"






#include "cpu.h"

namespace mace {

namespace yolov3{

std::string coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"};

int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90};



lightmode = true;

//num_threads = get_cpu_count();


blob_allocator = 0;

workspace_allocator = 0;


static Option g_default_option;

const Option& get_default_option()


return g_default_option;


int set_default_option(const Option& opt)


if (opt.num_threads <= 0)


fprintf(stderr, "invalid option num_threads %d\n", opt.num_threads);

return -1;


g_default_option = opt;

return 0;







//delete softmax;


int label;

static inline float intersection_area(const BBoxRect& a, const BBoxRect& b)


if (a.xmin > b.xmax || a.xmax < b.xmin || a.ymin > b.ymax || a.ymax < b.ymin)


// no intersection

return 0.f;


float inter_width = std::min(a.xmax, b.xmax) - std::max(a.xmin, b.xmin);

float inter_height = std::min(a.ymax, b.ymax) - std::max(a.ymin, b.ymin);

return inter_width * inter_height;



static void qsort_descent_inplace(std::vector &datas, std::vector& scores, int left, int right)


int i = left;

int j = right;

    int middle =(left + right) / 2;

float f = scores[middle];

while (i < j)









if (i


// swap

std::swap(datas[i], datas[j]);

std::swap(scores[i], scores[j]);










if (left < j) qsort_descent_inplace(datas, scores, left, j);

if (i < right) qsort_descent_inplace(datas, scores, i, right);



static void qsort_descent(std::vector& datas, std::vector& scores)


if (datas.empty() || scores.empty())


    int left=0;

    int right=datas.size()-1;

qsort_descent_inplace(datas, scores, left,right);


static void nms_sorted_bboxes(const std::vector& bboxes, std::vector& picked_boxes,std::vector& picked, float nms_threshold)



const int n = bboxes.size();

std::vector areas(n);

for (int i = 0; i < n; i++)


const BBoxRect& r = bboxes[i];

float width = r.xmax - r.xmin;

float height = r.ymax - r.ymin;

areas[i] = width * height;




for (int i = 1; i < n; i++)


const BBoxRect& a = bboxes[i];

int keep = 1;

for (int j = 0; j < (int)picked.size(); j++)


const BBoxRect& b = bboxes[picked[j]];

// intersection over union

            float ratio=areas[i]/areas[picked[j]];



            float inter_area = intersection_area(a, b)*ratio;

float union_area = areas[i] + areas[picked[j]] - inter_area;

// float IoU = inter_area / union_area

if (inter_area / union_area > nms_threshold)


                    keep = 0;




if (keep)







static inline float sigmoid(float x)


return 1.f / (1.f + exp(-x));


int Yolov3DetectionOutput::forward_nhwc(const std::vector& bottom_blobs, std::vector& top_blobs,std::vector&boxes) const


    size_t num_class = 80;

    size_t NUMS_ANCHOR = 3;

    float confidence_threshold=0.6;

    float nms_threshold = 0.45f;

    size_t scale[3]={32,16,8};

    size_t m_[9]={6,7,8,4,5,6,1,2,3};

    size_t anchors[18] = {10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326};

    size_t num_mask=3* NUMS_ANCHOR;

    size_t len_biases=3* NUMS_ANCHOR*2;

    Mat anchors_scale(3,scale);

    Mat mask(num_mask,m_);

    Mat biases(len_biases,anchors);

    std::vector all_bbox_rects;

    std::vector all_bbox_scores;

    for (size_t b = 0; b < bottom_blobs.size(); b++)


        const Mat& bottom_top_blobs = bottom_blobs[b];

        size_t w = bottom_top_blobs.w;

        size_t h = bottom_top_blobs.h;

        size_t channels = bottom_top_blobs.c;

        const size_t channels_per_box = channels / NUMS_ANCHOR;

        size_t mask_offset = b * NUMS_ANCHOR;

        size_t net_w = (size_t)((reinterpret_cast(anchors_scale.data))[b] * w);      

        size_t net_h = (size_t)((reinterpret_cast(anchors_scale.data))[b] * h);


        std::cout<<"blobs w,h,channels"<

        if((net_w!=net_h) || (net_w!=416)){

            std::cerr<<"check Data Input"<

            //return -1;


        float *p;

#pragma omp parallel for num_threads(opt.num_threads)

        for (size_t k = 0; k < h*w; k++)


            size_t i=k/w;

            size_t j=k%w;

            for (size_t pp = 0; pp < NUMS_ANCHOR; pp++)


                size_t biases_index = mask[pp+NUMS_ANCHOR*b];

                const float bias_w = (reinterpret_cast(biases.data))[biases_index * 2];

                const float bias_h = (reinterpret_cast(biases.data))[biases_index * 2+1] ;

                long bias=k*channels+pp*channels_per_box;

                 p= reinterpret_cast(bottom_top_blobs.data)+bias;    

                    float bbox_w = sigmoid(*(p+2))* bias_w ;

                    float bbox_h = sigmoid(*(p+3)) * bias_h ;

                    float bbox_cx = (j + sigmoid(*(p)))*416/w;

                    float bbox_cy = (i + sigmoid(*(p+1)))*416/h;

                    //float bbox_w = pow(2,*(p+2)) * bias_w;

                    //float bbox_h = pow(2,*(p+3)) * bias_h;

                    float bbox_xmin = bbox_cx - bbox_w * 0.5f;

                    float bbox_ymin = bbox_cy - bbox_h * 0.5f;

                    float bbox_xmax = bbox_cx + bbox_w * 0.5f;

                    float bbox_ymax = bbox_cy + bbox_h * 0.5f;





                    float box_score = sigmoid(*(p+4));

                    size_t class_index = 0;

                    float class_score = 0.0f;

                    for (size_t q = 0; q < num_class; q++)


                        float score = sigmoid(*(p+5+q));

                        if (score > class_score)



                            class_index = q;

                            //printf( "%d %f %f\n", class_index, box_score, class_score);



                    float confidence = box_score * class_score;

                    if (confidence >= confidence_threshold)


                        BBoxRect c = { bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax, class_index };

                        float area=(bbox_xmax-bbox_xmin)*(bbox_ymax-bbox_ymin);

                        if(area > 60.0){








    size_t NUMS_ANCHORes=all_bbox_rects.size();

    std::cout<<"alll: "<

    if(NUMS_ANCHORes==0)return 0;

    std::vector labelX;

    size_t lnum=0;

    size_t N=NUMS_ANCHORes;



    {   size_t la=all_bbox_rects[N-1].label;

        bool fl=true;

        for(size_t j=0;j


            if(la == labelX[j])












    std::vector > bboxes(lnum);

    std::vector >scores(lnum);   


    for (size_t i = 0; i < NUMS_ANCHORes; i++)


        size_t j=0;



            if(all_bbox_rects[i].label == labelX[j])


                std::vector& rect_=bboxes[j];

                std::vector& temp_=scores[j];








    std::vector& picked=boxes;

    std::vector score;   

    //size_t type=0;

    for(size_t i=0;i<(size_t)bboxes.size();i++)


        std::vector& s=scores[i];

        if(s.empty()) continue;


        // global sort inplace

        std::vector& b=bboxes[i];

        qsort_descent(b, s);

        // apply nms

        std::vector pick;

        std::vector p;

        nms_sorted_bboxes(bboxes[i], pick,p, nms_threshold);


        for (size_t j = 0; j < (size_t)p.size(); j++)






size_t num_detected = picked.size();

    for (size_t i = 0; i < num_detected; i++)


const BBoxRect& r = picked[i];

        float score_ = score[i];

        //std::cout<<"class: "<< coco_classes[r.label] <<" "<< r.label <<"; score "<


    std::cout<<"detected:<< num_detected<<----------------------- "<

    std::cout<<"-------------------------------- "<

    //global nms

    //std::cout<<"glabal nms: "<

    //std::cout<<"picked: "<

    //#qsort_descent(all_bbox_rects, all_bbox_scores);

    //std::vector picked_gnms;

    //std::vector p_gnms;

    //nms_sorted_bboxes(all_bbox_rects, picked_gnms,p_gnms, nms_threshold);

    //num_detected = picked_gnms.size();

    //for (size_t i = 0; i < num_detected; i++)


// const BBoxRect& r = picked_gnms[i];

    //  float score_ = score[i];

    //  std::cout<<"class: "<< coco_classes[r.label] <<" "<< r.label <<"; score "<


    //std::cout<<"detected: "<

    Mat& top_blob =top_blobs[0];

return 1;


int Yolov3DetectionOutput::forward(const std::vector& bottom_blobs, std::vector& top_blobs,std::vector&boxes) const


    int num_class = 80;

    int NUMS_ANCHOR = 3;

    float confidence_threshold=0.6;

    float nms_threshold = 0.6f;

    int scale[3]={32,16,8};

    int m_[9]={8,8,8,8,8,8,8,8,8};

    int anchors[18] = {10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326};

    int num_mask=3* NUMS_ANCHOR;

    int len_biases=3* NUMS_ANCHOR*2;

    Mat anchors_scale(3,scale);

    Mat mask(num_mask,m_);

    Mat biases(len_biases,anchors);

    std::vector all_bbox_rects;

    std::vector all_bbox_scores;

    for (size_t b = 0; b < bottom_blobs.size(); b++)


        const Mat& bottom_top_blobs = bottom_blobs[b];

        int w = bottom_top_blobs.w;

        int h = bottom_top_blobs.h;

        int channels = bottom_top_blobs.c;

        //printf("%d %d %d\n", w, h, channels);

        const int channels_per_box = channels / NUMS_ANCHOR;

        // anchor coord + box score + num_class

        if (channels_per_box != 4 + 1 + num_class)

            return -1;

        int mask_offset = b * NUMS_ANCHOR;

        //int net_w= anchors_scale[b];

        int net_w = (int)((reinterpret_cast(anchors_scale.data))[b] * w);       

        int net_h = (int)((reinterpret_cast(anchors_scale.data))[b] * h);

        if((net_w!=net_h) || (net_w!=416)){

            std::cerr<<"check Data Input"<

            return -1;


        //printf("%d %d\n", net_w, net_h);

        //printf("%d %d %d\n", w, h, channels);

#pragma omp parallel for num_threads(opt.num_threads)

        for (int pp = 0; pp < NUMS_ANCHOR; pp++)


            int p = pp * channels_per_box;

            int biases_index = (reinterpret_cast(mask.data))[pp + mask_offset];

            //printf("%d\n", biases_index);

            const float bias_w = (reinterpret_cast(biases.data))[biases_index * 2];

            const float bias_h = (reinterpret_cast(biases.data))[biases_index * 2+1] ;

            //printf("%f %f\n", bias_w, bias_h);

            const float* xptr = bottom_top_blobs.channel(p);

            const float* yptr = bottom_top_blobs.channel(p + 1);

            const float* wptr = bottom_top_blobs.channel(p + 2);

            const float* hptr = bottom_top_blobs.channel(p + 3);

            const float* box_score_ptr = bottom_top_blobs.channel(p + 4);

            // softmax class scores

            Mat scores = bottom_top_blobs.channel_range(p + 5, num_class);

            //softmax->forward_inplace(scores, opt);

            for (int k = 0; k < h*w; k++)


                    int i=k/w;

                    int j=k%w;

                    float bbox_w = exp(wptr[0]) * bias_w ;

                    float bbox_h = exp(hptr[0]) * bias_h ;

                    float bbox_cx = (j + sigmoid(xptr[0]))*416/w;

                    //float bbox_cx = (j + sigmoid(xptr[0]))*416/w;

                    float bbox_cy = (i + sigmoid(yptr[0]))*416/h;

                    //float bbox_w = pow(2,wptr[0]) * bias_w / 416;

                    //float bbox_h = pow(2,hptr[0]) * bias_h / 416;

                    float bbox_xmin = bbox_cx - bbox_w * 0.5f;

                    float bbox_ymin = bbox_cy - bbox_h * 0.5f;

                    float bbox_xmax = bbox_cx + bbox_w * 0.5f;

                    float bbox_ymax = bbox_cy + bbox_h * 0.5f;





                    // box score

                    float box_score = sigmoid(box_score_ptr[0]);

                    // find class index with max class score

                    int class_index = 0;

                    float class_score = 0.0f;

                    for (int q = 0; q < num_class; q++)


                        float score = sigmoid(scores.channel(q).row(i)[j]);

                        if (score > class_score)


                            class_index = q;

                            class_score = score;

                            //printf( "%d %f %f\n", class_index, box_score, class_score);



                    float confidence = box_score * class_score;

                    if (confidence >= confidence_threshold)


                        BBoxRect c = { bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax, class_index };

                        float area=(bbox_xmax-bbox_xmin)*(bbox_ymax-bbox_ymin);

                        if(area > 200.0){













    std::cout<<"alll: "<

    std::vector > bboxes(80);

    std::vector >scores(80);

    int NUMS_ANCHORes=all_bbox_rects.size();

    std::vector label(80);


    for (int i = 0; i < 80; ++i)


        label[i] = i;


    //std::vector rect;

    //std::vector temp;






    for (int i = 0; i < NUMS_ANCHORes; i++)


        int j=0;



            if(all_bbox_rects[i].label == label[j])


                std::vector& rect_=bboxes[j];

                std::vector& temp_=scores[j];












            //int l=all_bbox_rects[i].label;








    std::vector& picked=boxes;

    std::vector score;   

    //int type=0;

    for(int i=0;i<(int)bboxes.size();i++)


        std::vector& s=scores[i];

        if(s.empty()) continue;


        // global sort inplace

        std::vector& b=bboxes[i];

        qsort_descent(b, s);

        // apply nms

        std::vector pick;

        std::vector p;

        nms_sorted_bboxes(bboxes[i], pick,p, nms_threshold);



        for (int j = 0; j < (int)p.size(); j++)







    std::cout<<"picked: "<

int num_detected = picked.size();

    for (int i = 0; i < num_detected; i++)


const BBoxRect& r = picked[i];

        float score_ = score[i];

        std::cout<<"class: "<< coco_classes[r.label+1] <<" "<< r.label <<"; score "<


    std::cout<<"detected: "<

    //global nms

    //std::cout<<"glabal nms: "<

    //qsort_descent(all_bbox_rects, all_bbox_scores);

    //std::vector picked_gnms;

    //std::vector p_gnms;

    //nms_sorted_bboxes(all_bbox_rects, picked_gnms,p_gnms, nms_threshold);

    //num_detected = picked_gnms.size();

//for (int i = 0; i < num_detected; i++)


// const BBoxRect& r = picked_gnms[i];

    //  float score_ = score[i];

    //  std::cout<<"class: "<< coco_classes[r.label-1] <<" "<< r.label <<"; score "<


    //std::cout<<"detected: "<

    //Mat& top_blob =top_blobs[0];


    //float *p = reinterpret_cast(top_blob.data);

    /*float *p;

    for (int i = 0; i < num_detected; i++)


        const BBoxRect& r = picked_gnms[i];

        float score_ = score[i];








    //Mat& top_blob = top_blobs[0];

    //top_blob.create(6, num_detected,1,4U);


/*if (top_blob.empty())return -100;

for (int i = 0; i < num_detected; i++)


const BBoxRect& r = bbox_rects[i];

float score = bbox_scores[i];

float* outptr = top_blob.row(i);

outptr[0] = r.label + 1;// +1 for prepend background class

outptr[1] = score;

outptr[2] = (r.xmin)*416;

outptr[3] = (r.ymin)*416;

outptr[4] = (r.xmax)*416;

outptr[5] = (r.ymax)*416;

        std::cout<<"class "<

} */

return 0;


int inline Yolov3DetectionOutput::readData2Blobs(std::string name,std::vector& shape,std::vector& bottom_blobs){

    if(shape.size()!=3){return -1;}

    std::ifstream ifs(name.c_str(),std::ios::binary);


    int length=ifs.tellg();


    int len2=52*52*255*4;


    int len=length/sizeof(float);

    float *p=new float[len];




    Mat blob(shape[2],shape[1],shape[0],p,4u);




    return 0;


int inline Yolov3DetectionOutput::readData_Blobs(std::string name,std::vector& bottom_blobs){

    std::ifstream ifs(name.c_str(),std::ios::binary);


    int length=ifs.tellg();



    int len=length/sizeof(float);

    float *p=new float[len];





    int len_blob=13*13*255;

    int len_blob2=26*26*255;

    Mat blob(255,13,13,p,4u);

    Mat blob2(255,26,26,p+len_blob,4u);

    Mat blob3(255,52,52,p+len_blob2,4u);






    return 0;


int Main(int argc, char **argv) {

    std::cout<<"usage: detect implem "<

    std::vector bottom_blobs;

    std::vector top_blobs;

    int a[3]={255,13,13};

    int b[3]={255,26,26};

    int c[3]={255,52,52};

    std::vector shape(a,a+3);

    std::vector shape2(b,b+3);

    std::vector shape3(c,c+3);

    std::vector bottom_blobs_tiny;

    std::vector bottom_blobs_caffe;

    std::vector top_blobs_caffe;

    std::vector bottom_blobs_darknet;

    std::vector top_blobs_darknet;












    std::cout<<"darknetElement0: ";



    Yolov3DetectionOutput* y= new Yolov3DetectionOutput();

    std::vector boxes2;std::vector boxes3;




    //const Mat& bottom_top_blobs = bottom_blobs[0];

    //const float* xptr = bottom_top_blobs.channel(0);





   std::string imgFilename="/home/jefff/MACE/DOG_416.jpg";

    cv::Mat img = cv::imread(imgFilename);

    std::vector&boxes= boxes2;

    int num=boxes.size();


        int left=ceil(boxes[num].xmin);

        int top=ceil(boxes[num].ymin);

        int right=ceil(boxes[num].xmax);

        int bot=ceil(boxes[num].ymax);

        printf("left = %d,right = %d,top = %d,bot = %d\n",left,right,top,bot);


        char l[]={boxes[num].label/10+48,boxes[num].label%10+48};

        std::string ll =l;









    return 0;   



}// namespace mace

int main(int argc, char **argv) { mace::yolov3::Main(argc, argv);}


#ifndef __DETECTION_H

#define __DETECTION_H


#include "mat.h"

#include "allocator.h"



namespace mace{

namespace yolov3{

struct BBoxRect


float xmin;

float ymin;

float xmax;

float ymax;

    int label;


class Yolov3DetectionOutput





int load_param();

static int readData2Blobs(std::string name,std::vector& shape,std::vector& bottom_blobs);

static int readData_Blobs(std::string name,std::vector& bottom_blobs);

//virtual int load_param(const ParamDict& pd);

//int forward(const std::vector& bottom_blobs, std::vector& top_blobs,const Option& opt) const;

int forward(const std::vector& bottom_blobs, std::vector& top_blobs,std::vector& boxes) const;

int forward_nhwc(const std::vector& bottom_blobs, std::vector& top_blobs,std::vector& boxes) const;


//int num_class;

//int num_box;

//float confidence_threshold;

//float nms_threshold;

//float class_score;

//Mat biases;

    //Mat mask;

    //Mat anchors_scale;

    int mask_group_num;

//ncnn::Layer* softmax;


class Option



// default option



// light mode

// intermediate blob will be recycled when enabled

// enabled by default

bool lightmode;

// thread count

// default value is the one returned by get_cpu_count()

int num_threads;

// blob memory allocator

Allocator* blob_allocator;

// workspace memory allocator

Allocator* workspace_allocator;


const Option& get_default_option();

int set_default_option(const Option& opt);


} // namespace mace


