首先分享两个可视化的工具。netscope,打开WEB,将文件拖动到左端空白。
http://ethereon.github.io/netscope/#/editor
Netron支持主流各种框架的模型结构可视化工作,我给出gayhub链接:
https://github.com/lutzroeder/Netron
YoLov3在性能上做过优化,用来做人脸和其它识别非常地棒。YoloV3,Caffe框架都支持训练。在部署时要写推理部分。检测部分的代码网上资料较少,这里给出一个框架。目标将YoloV3移植到嵌入式,C++平台。下面直接给出检测部分。希望能帮到大家。
// specific language governing permissions and limitations under the License.
#include
#include "detection.h"
#include
#include
#include
#include
#include
#include "cpu.h"
namespace mace {
namespace yolov3{
std::string coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"};
int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90};
Option::Option()
{
lightmode = true;
//num_threads = get_cpu_count();
num_threads=4;
blob_allocator = 0;
workspace_allocator = 0;
}
static Option g_default_option;
const Option& get_default_option()
{
return g_default_option;
}
int set_default_option(const Option& opt)
{
if (opt.num_threads <= 0)
{
fprintf(stderr, "invalid option num_threads %d\n", opt.num_threads);
return -1;
}
g_default_option = opt;
return 0;
}
Yolov3DetectionOutput::Yolov3DetectionOutput()
{
}
Yolov3DetectionOutput::~Yolov3DetectionOutput()
{
//delete softmax;
}
int label;
static inline float intersection_area(const BBoxRect& a, const BBoxRect& b)
{
if (a.xmin > b.xmax || a.xmax < b.xmin || a.ymin > b.ymax || a.ymax < b.ymin)
{
// no intersection
return 0.f;
}
float inter_width = std::min(a.xmax, b.xmax) - std::max(a.xmin, b.xmin);
float inter_height = std::min(a.ymax, b.ymax) - std::max(a.ymin, b.ymin);
return inter_width * inter_height;
}
template
static void qsort_descent_inplace(std::vector
{
int i = left;
int j = right;
int middle =(left + right) / 2;
float f = scores[middle];
while (i < j)
{
while(scores[i]>f){
i++;
}
while(scores[j] if(left==middle)break; j--; } if (i { // swap std::swap(datas[i], datas[j]); std::swap(scores[i], scores[j]); i++; j--; } if(i==j) { i++; j--; } } if (left < j) qsort_descent_inplace(datas, scores, left, j); if (i < right) qsort_descent_inplace(datas, scores, i, right); } template static void qsort_descent(std::vector { if (datas.empty() || scores.empty()) return; int left=0; int right=datas.size()-1; qsort_descent_inplace(datas, scores, left,right); } static void nms_sorted_bboxes(const std::vector { picked.clear(); const int n = bboxes.size(); std::vector for (int i = 0; i < n; i++) { const BBoxRect& r = bboxes[i]; float width = r.xmax - r.xmin; float height = r.ymax - r.ymin; areas[i] = width * height; } picked.push_back(0); picked_boxes.push_back(bboxes[0]); for (int i = 1; i < n; i++) { const BBoxRect& a = bboxes[i]; int keep = 1; for (int j = 0; j < (int)picked.size(); j++) { const BBoxRect& b = bboxes[picked[j]]; // intersection over union float ratio=areas[i]/areas[picked[j]]; ratio=(ratio>1)?ratio:(1.0/(float)ratio); //ratio=sqrt(ratio); float inter_area = intersection_area(a, b)*ratio; float union_area = areas[i] + areas[picked[j]] - inter_area; // float IoU = inter_area / union_area if (inter_area / union_area > nms_threshold) { keep = 0; break; } } if (keep) { picked.push_back(i); picked_boxes.push_back(a); } } } static inline float sigmoid(float x) { return 1.f / (1.f + exp(-x)); } int Yolov3DetectionOutput::forward_nhwc(const std::vector { size_t num_class = 80; size_t NUMS_ANCHOR = 3; float confidence_threshold=0.6; float nms_threshold = 0.45f; size_t scale[3]={32,16,8}; size_t m_[9]={6,7,8,4,5,6,1,2,3}; size_t anchors[18] = {10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326}; size_t num_mask=3* NUMS_ANCHOR; size_t len_biases=3* NUMS_ANCHOR*2; Mat anchors_scale(3,scale); Mat mask(num_mask,m_); Mat biases(len_biases,anchors); std::vector std::vector for (size_t b = 0; b < bottom_blobs.size(); b++) { const Mat& bottom_top_blobs = bottom_blobs[b]; size_t w = bottom_top_blobs.w; size_t h = bottom_top_blobs.h; size_t channels = bottom_top_blobs.c; const size_t channels_per_box = channels / NUMS_ANCHOR; size_t mask_offset = b * NUMS_ANCHOR; size_t net_w = (size_t)((reinterpret_cast size_t net_h = (size_t)((reinterpret_cast std::cout<<"w,h,c"< std::cout<<"blobs w,h,channels"< if((net_w!=net_h) || (net_w!=416)){ std::cerr<<"check Data Input"< //return -1; } float *p; #pragma omp parallel for num_threads(opt.num_threads) for (size_t k = 0; k < h*w; k++) { size_t i=k/w; size_t j=k%w; for (size_t pp = 0; pp < NUMS_ANCHOR; pp++) { size_t biases_index = mask[pp+NUMS_ANCHOR*b]; const float bias_w = (reinterpret_cast const float bias_h = (reinterpret_cast long bias=k*channels+pp*channels_per_box; p= reinterpret_cast float bbox_w = sigmoid(*(p+2))* bias_w ; float bbox_h = sigmoid(*(p+3)) * bias_h ; float bbox_cx = (j + sigmoid(*(p)))*416/w; float bbox_cy = (i + sigmoid(*(p+1)))*416/h; //float bbox_w = pow(2,*(p+2)) * bias_w; //float bbox_h = pow(2,*(p+3)) * bias_h; float bbox_xmin = bbox_cx - bbox_w * 0.5f; float bbox_ymin = bbox_cy - bbox_h * 0.5f; float bbox_xmax = bbox_cx + bbox_w * 0.5f; float bbox_ymax = bbox_cy + bbox_h * 0.5f; bbox_xmin=bbox_xmin>0?bbox_xmin:0; bbox_ymin=bbox_ymin>0?bbox_ymin:0; bbox_xmax=bbox_xmax>416?416:bbox_xmax; bbox_ymax=bbox_ymax>416?416:bbox_ymax; float box_score = sigmoid(*(p+4)); size_t class_index = 0; float class_score = 0.0f; for (size_t q = 0; q < num_class; q++) { float score = sigmoid(*(p+5+q)); if (score > class_score) { class_score=score; class_index = q; //printf( "%d %f %f\n", class_index, box_score, class_score); } } float confidence = box_score * class_score; if (confidence >= confidence_threshold) { BBoxRect c = { bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax, class_index }; float area=(bbox_xmax-bbox_xmin)*(bbox_ymax-bbox_ymin); if(area > 60.0){ all_bbox_rects.push_back(c); all_bbox_scores.push_back(confidence); } } } } } size_t NUMS_ANCHORes=all_bbox_rects.size(); std::cout<<"alll: "< if(NUMS_ANCHORes==0)return 0; std::vector size_t lnum=0; size_t N=NUMS_ANCHORes; if(N>0){labelX.push_back(all_bbox_rects[N-1].label);lnum++;} while(--N) { size_t la=all_bbox_rects[N-1].label; bool fl=true; for(size_t j=0;j { if(la == labelX[j]) { fl=false; break; } } if(fl){ labelX.push_back(la); lnum++; } } std::cout<<"labels:"< std::vector std::vector //label.push_back(all_bbox_rects[0].label); for (size_t i = 0; i < NUMS_ANCHORes; i++) { size_t j=0; for(;(size_t)j { if(all_bbox_rects[i].label == labelX[j]) { std::vector std::vector //printf("%d::%f\n",j,all_bbox_scores[i]); rect_.push_back(all_bbox_rects[i]); temp_.push_back(all_bbox_scores[i]); break; } } } std::vector std::vector //size_t type=0; for(size_t i=0;i<(size_t)bboxes.size();i++) { std::vector if(s.empty()) continue; //type++; // global sort inplace std::vector qsort_descent(b, s); // apply nms std::vector std::vector nms_sorted_bboxes(bboxes[i], pick,p, nms_threshold); picked.insert(picked.begin(),pick.begin(),pick.end()); for (size_t j = 0; j < (size_t)p.size(); j++) { score.push_back(s[p[j]]); } } //printf("%d\n",type); size_t num_detected = picked.size(); for (size_t i = 0; i < num_detected; i++) { const BBoxRect& r = picked[i]; float score_ = score[i]; //std::cout<<"class: "<< coco_classes[r.label] <<" "<< r.label <<"; score "< } std::cout<<"detected:<< num_detected<<----------------------- "< std::cout<<"-------------------------------- "< //global nms //std::cout<<"glabal nms: "< //std::cout<<"picked: "< //#qsort_descent(all_bbox_rects, all_bbox_scores); //std::vector //std::vector //nms_sorted_bboxes(all_bbox_rects, picked_gnms,p_gnms, nms_threshold); //num_detected = picked_gnms.size(); //for (size_t i = 0; i < num_detected; i++) //{ // const BBoxRect& r = picked_gnms[i]; // float score_ = score[i]; // std::cout<<"class: "<< coco_classes[r.label] <<" "<< r.label <<"; score "< //} //std::cout<<"detected: "< Mat& top_blob =top_blobs[0]; return 1; } int Yolov3DetectionOutput::forward(const std::vector { int num_class = 80; int NUMS_ANCHOR = 3; float confidence_threshold=0.6; float nms_threshold = 0.6f; int scale[3]={32,16,8}; int m_[9]={8,8,8,8,8,8,8,8,8}; int anchors[18] = {10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326}; int num_mask=3* NUMS_ANCHOR; int len_biases=3* NUMS_ANCHOR*2; Mat anchors_scale(3,scale); Mat mask(num_mask,m_); Mat biases(len_biases,anchors); std::vector std::vector for (size_t b = 0; b < bottom_blobs.size(); b++) { const Mat& bottom_top_blobs = bottom_blobs[b]; int w = bottom_top_blobs.w; int h = bottom_top_blobs.h; int channels = bottom_top_blobs.c; //printf("%d %d %d\n", w, h, channels); const int channels_per_box = channels / NUMS_ANCHOR; // anchor coord + box score + num_class if (channels_per_box != 4 + 1 + num_class) return -1; int mask_offset = b * NUMS_ANCHOR; //int net_w= anchors_scale[b]; int net_w = (int)((reinterpret_cast int net_h = (int)((reinterpret_cast if((net_w!=net_h) || (net_w!=416)){ std::cerr<<"check Data Input"< return -1; } //printf("%d %d\n", net_w, net_h); //printf("%d %d %d\n", w, h, channels); #pragma omp parallel for num_threads(opt.num_threads) for (int pp = 0; pp < NUMS_ANCHOR; pp++) { int p = pp * channels_per_box; int biases_index = (reinterpret_cast //printf("%d\n", biases_index); const float bias_w = (reinterpret_cast const float bias_h = (reinterpret_cast //printf("%f %f\n", bias_w, bias_h); const float* xptr = bottom_top_blobs.channel(p); const float* yptr = bottom_top_blobs.channel(p + 1); const float* wptr = bottom_top_blobs.channel(p + 2); const float* hptr = bottom_top_blobs.channel(p + 3); const float* box_score_ptr = bottom_top_blobs.channel(p + 4); // softmax class scores Mat scores = bottom_top_blobs.channel_range(p + 5, num_class); //softmax->forward_inplace(scores, opt); for (int k = 0; k < h*w; k++) { int i=k/w; int j=k%w; float bbox_w = exp(wptr[0]) * bias_w ; float bbox_h = exp(hptr[0]) * bias_h ; float bbox_cx = (j + sigmoid(xptr[0]))*416/w; //float bbox_cx = (j + sigmoid(xptr[0]))*416/w; float bbox_cy = (i + sigmoid(yptr[0]))*416/h; //float bbox_w = pow(2,wptr[0]) * bias_w / 416; //float bbox_h = pow(2,hptr[0]) * bias_h / 416; float bbox_xmin = bbox_cx - bbox_w * 0.5f; float bbox_ymin = bbox_cy - bbox_h * 0.5f; float bbox_xmax = bbox_cx + bbox_w * 0.5f; float bbox_ymax = bbox_cy + bbox_h * 0.5f; bbox_xmin=bbox_xmin>0?bbox_xmin:0; bbox_ymin=bbox_ymin>0?bbox_ymin:0; bbox_xmax=bbox_xmax>416?416:bbox_xmax; bbox_ymax=bbox_ymax>416?416:bbox_ymax; // box score float box_score = sigmoid(box_score_ptr[0]); // find class index with max class score int class_index = 0; float class_score = 0.0f; for (int q = 0; q < num_class; q++) { float score = sigmoid(scores.channel(q).row(i)[j]); if (score > class_score) { class_index = q; class_score = score; //printf( "%d %f %f\n", class_index, box_score, class_score); } } float confidence = box_score * class_score; if (confidence >= confidence_threshold) { BBoxRect c = { bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax, class_index }; float area=(bbox_xmax-bbox_xmin)*(bbox_ymax-bbox_ymin); if(area > 200.0){ all_bbox_rects.push_back(c); all_bbox_scores.push_back(confidence); } } xptr++; yptr++; wptr++; hptr++; box_score_ptr++; } } } std::cout<<"alll: "< std::vector std::vector int NUMS_ANCHORes=all_bbox_rects.size(); std::vector //label.insert(label.begin,coco_classes,coco_classes+80); for (int i = 0; i < 80; ++i) { label[i] = i; } //std::vector //std::vector //label.push_back(all_bbox_rects[0].label); //rect.push_back(all_bbox_rects[0]); //temp.push_back(all_bbox_scores[0]); //bboxes.push_back(rect); //scores.push_back(temp); for (int i = 0; i < NUMS_ANCHORes; i++) { int j=0; for(;j { if(all_bbox_rects[i].label == label[j]) { std::vector std::vector //printf("%d::%f\n",j,all_bbox_scores[i]); rect_.push_back(all_bbox_rects[i]); temp_.push_back(all_bbox_scores[i]); break; } }/* if(j==(int)label.size()) { //rect.pop_back(); //temp.pop_back(); //printf("%d::%d::%d\n",label.size(),all_bbox_rects[i].label,i); //int l=all_bbox_rects[i].label; label.push_back(all_bbox_rects[i].labels); //rect.push_back(all_bbox_rects[i].label); //temp.push_back(all_bbox_scores[i]); //bboxes.push_back(rect); //scores.push_back(temp); }*/ } std::vector std::vector //int type=0; for(int i=0;i<(int)bboxes.size();i++) { std::vector if(s.empty()) continue; //type++; // global sort inplace std::vector qsort_descent(b, s); // apply nms std::vector std::vector nms_sorted_bboxes(bboxes[i], pick,p, nms_threshold); picked.insert(picked.begin(),pick.begin(),pick.end()); for (int j = 0; j < (int)p.size(); j++) { score.push_back(s[p[j]]); } } //printf("%d\n",type); std::cout<<"picked: "< int num_detected = picked.size(); for (int i = 0; i < num_detected; i++) { const BBoxRect& r = picked[i]; float score_ = score[i]; std::cout<<"class: "<< coco_classes[r.label+1] <<" "<< r.label <<"; score "< } std::cout<<"detected: "< //global nms //std::cout<<"glabal nms: "< //qsort_descent(all_bbox_rects, all_bbox_scores); //std::vector //std::vector //nms_sorted_bboxes(all_bbox_rects, picked_gnms,p_gnms, nms_threshold); //num_detected = picked_gnms.size(); //for (int i = 0; i < num_detected; i++) //{ // const BBoxRect& r = picked_gnms[i]; // float score_ = score[i]; // std::cout<<"class: "<< coco_classes[r.label-1] <<" "<< r.label <<"; score "< //} //std::cout<<"detected: "< //Mat& top_blob =top_blobs[0]; //top_blob.reshape(6,num_detected,4u); //float *p = reinterpret_cast /*float *p; for (int i = 0; i < num_detected; i++) { const BBoxRect& r = picked_gnms[i]; float score_ = score[i]; *(p++)=r.xmin; *(p++)=r.xmax; *(p++)=r.ymin; *(p++)=r.ymax; *(p++)=r.label+1; *(p++)=score[i]; }*/ //Mat& top_blob = top_blobs[0]; //top_blob.create(6, num_detected,1,4U); //std::cout<<"success"< /*if (top_blob.empty())return -100; for (int i = 0; i < num_detected; i++) { const BBoxRect& r = bbox_rects[i]; float score = bbox_scores[i]; float* outptr = top_blob.row(i); outptr[0] = r.label + 1;// +1 for prepend background class outptr[1] = score; outptr[2] = (r.xmin)*416; outptr[3] = (r.ymin)*416; outptr[4] = (r.xmax)*416; outptr[5] = (r.ymax)*416; std::cout<<"class "< } */ return 0; } int inline Yolov3DetectionOutput::readData2Blobs(std::string name,std::vector if(shape.size()!=3){return -1;} std::ifstream ifs(name.c_str(),std::ios::binary); ifs.seekg(0,std::ios::end); int length=ifs.tellg(); ifs.seekg(0,std::ios::beg); int len2=52*52*255*4; printf("%d::MAX%d\n",length,len2); int len=length/sizeof(float); float *p=new float[len]; if(ifs.is_open()){ ifs.read(reinterpret_cast } Mat blob(shape[2],shape[1],shape[0],p,4u); bottom_blobs.push_back(blob); ifs.close(); blob.~Mat(); return 0; } int inline Yolov3DetectionOutput::readData_Blobs(std::string name,std::vector std::ifstream ifs(name.c_str(),std::ios::binary); ifs.seekg(0,std::ios::end); int length=ifs.tellg(); ifs.seekg(0,std::ios::beg); int len=length/sizeof(float); float *p=new float[len]; if(ifs.is_open()){ ifs.read(reinterpret_cast } //printf("out:%f",*p); int len_blob=13*13*255; int len_blob2=26*26*255; Mat blob(255,13,13,p,4u); Mat blob2(255,26,26,p+len_blob,4u); Mat blob3(255,52,52,p+len_blob2,4u); bottom_blobs.push_back(blob); bottom_blobs.push_back(blob2); bottom_blobs.push_back(blob3); ifs.close(); blob.~Mat(); return 0; } std::cout<<"usage: detect implem "< std::vector std::vector int a[3]={255,13,13}; int b[3]={255,26,26}; int c[3]={255,52,52}; std::vector std::vector std::vector std::vector std::vector std::vector std::vector std::vector Yolov3DetectionOutput::readData2Blobs("1216/model_out_layer16_conv",shape,bottom_blobs_tiny); Yolov3DetectionOutput::readData2Blobs("1216/model_out_layer23_conv",shape2,bottom_blobs_tiny); std::cout<<"Blobs"< Yolov3DetectionOutput::readData2Blobs("/home/jefff/MACE/YOLOV3/model_out_layer82_conv",shape,bottom_blobs); Yolov3DetectionOutput::readData2Blobs("/home/jefff/MACE/YOLOV3/model_out_layer94_conv",shape2,bottom_blobs); Yolov3DetectionOutput::readData2Blobs("/home/jefff/MACE/YOLOV3/model_out_layer106_conv",shape3,bottom_blobs); std::cout<<"Blobs"< Yolov3DetectionOutput::readData2Blobs("/home/jefff/MACE/13x13",shape,bottom_blobs_caffe); Yolov3DetectionOutput::readData2Blobs("/home/jefff/MACE/26x26",shape2,bottom_blobs_caffe); //Yolov3DetectionOutput::readData2Blobs("/home/jefff/MACE/52x52",shape3,bottom_blobs_caffe); Yolov3DetectionOutput::readData_Blobs("v3/416X416darknetout",bottom_blobs_darknet); std::cout<<"darknetElement0: "; std::cout< Yolov3DetectionOutput* y= new Yolov3DetectionOutput(); std::vector //y->forward(bottom_blobs_tiny,top_blobs); y->forward(bottom_blobs,top_blobs,boxes2); //y->forward_nhwc(bottom_blobs,top_blobs,boxes2); //const Mat& bottom_top_blobs = bottom_blobs[0]; //const float* xptr = bottom_top_blobs.channel(0); //y->forward(bottom_blobs_caffe,top_blobs_caffe); //y->forward_nhwc(bottom_blobs_caffe,top_blobs_caffe,boxes3); //y->forward(bottom_blobs_darknet,top_blobs_darknet); std::string imgFilename="/home/jefff/MACE/DOG_416.jpg"; cv::Mat img = cv::imread(imgFilename); std::vector int num=boxes.size(); while(num--){ int left=ceil(boxes[num].xmin); int top=ceil(boxes[num].ymin); int right=ceil(boxes[num].xmax); int bot=ceil(boxes[num].ymax); printf("left = %d,right = %d,top = %d,bot = %d\n",left,right,top,bot); cv::rectangle(img,cv::Point(left,top),cv::Point(right,bot),cv::Scalar(0,0,255),3,8,0); char l[]={boxes[num].label/10+48,boxes[num].label%10+48}; std::string ll =l; cv::putText(img,ll,cv::Point(right,bot),cv::FONT_HERSHEY_PLAIN,1.0,cv::Scalar(0,0,0),1,1); } cv::namedWindow("show",CV_WINDOW_AUTOSIZE); cv::imshow("show",img); cv::waitKey(0); //free_detections(dets,nboxes); //free_image(im); //free_image(sized); return 0; } } }// namespace mace int main(int argc, char **argv) { mace::yolov3::Main(argc, argv);} 头文件 #ifndef __DETECTION_H #define __DETECTION_H #include #include "mat.h" #include "allocator.h" #include #include namespace mace{ namespace yolov3{ struct BBoxRect { float xmin; float ymin; float xmax; float ymax; int label; }; class Yolov3DetectionOutput { public: Yolov3DetectionOutput(); ~Yolov3DetectionOutput(); int load_param(); static int readData2Blobs(std::string name,std::vector static int readData_Blobs(std::string name,std::vector //virtual int load_param(const ParamDict& pd); //int forward(const std::vector int forward(const std::vector int forward_nhwc(const std::vector public: //int num_class; //int num_box; //float confidence_threshold; //float nms_threshold; //float class_score; //Mat biases; //Mat mask; //Mat anchors_scale; int mask_group_num; //ncnn::Layer* softmax; }; class Option { public: // default option Option(); public: // light mode // intermediate blob will be recycled when enabled // enabled by default bool lightmode; // thread count // default value is the one returned by get_cpu_count() int num_threads; // blob memory allocator Allocator* blob_allocator; // workspace memory allocator Allocator* workspace_allocator; }; const Option& get_default_option(); int set_default_option(const Option& opt); } } // namespace mace #endif // LAYER_YOLODETECTIONOUTPUT_H
int Main(int argc, char **argv) {