R-FCN源代码解读

本文针对目标检测算法R-FCN源代码中psroi_pooling_layer.cu的forward过程进行详细解读,具体示意图理解参考的是http://blog.csdn.net/shadow_guo/article/details/51767036,下面给出代码解读:


// ------------------------------------------------------------------
// R-FCN
// Copyright (c) 2016 Microsoft
// Licensed under The MIT License [see r-fcn/LICENSE for details]
// Written by Yi Li
// ------------------------------------------------------------------

#include 

#include "caffe/rfcn_layers.hpp"
#include "caffe/util/gpu_util.cuh"

using std::max;
using std::min;

namespace caffe {

  template 
  __global__ void PSROIPoolingForward(
    const int nthreads,
    const Dtype* bottom_data,  //输入的feature map的像素值
    const Dtype spatial_scale, //由之前所有卷积层的strides决定,此处为16
    const int channels,  //feature map的channels=k*k*(C+1)
    const int height,   //feature map的宽度
    const int width,  //feature map的高度
    const int pooled_height,  //==k=7
    const int pooled_width,   //==k=7
    const Dtype* bottom_rois,  //输入的roi,包括[batch_ind,x1,y1,x2,y2]
    const int output_dim,  //输出score map的dim,psroipooled_cls_rois时为21,psroipooled_loc_rois时为8
    const int group_size,   //==k=7
    Dtype* top_data, //socre map的输出
    int* mapping_channel) {
    //使用CUDA多线程计算
    CUDA_KERNEL_LOOP(index, nthreads) {   //index为最终score map上所有,共有(C+1)*k*k个值
      // The output is in order (n, ctop, ph, pw),类似于图像的BIL逐行扫描
      int pw = index % pooled_width;   //score map上第i=[0,k-1]列
      int ph = (index / pooled_width) % pooled_height;   //score map上第j=[0,k-1]行
      int ctop = (index / pooled_width / pooled_height) % output_dim;   //score map上第ctop个层(class)
      int n = index / pooled_width / pooled_height / output_dim;   //第n个roi

      // [start, end) interval for spatial sampling
      bottom_rois += n * 5;    //bottom_rois每次移动5
      int roi_batch_ind = bottom_rois[0];   //bottom_rois第0个位置存放的是batch_ind
      //此处计算的是roi在feature_map上的坐标范围
      Dtype roi_start_w = static_cast(round(bottom_rois[1])) * spatial_scale;
      Dtype roi_start_h = static_cast(round(bottom_rois[2])) * spatial_scale;
      Dtype roi_end_w = static_cast(round(bottom_rois[3]) + 1.) * spatial_scale;
      Dtype roi_end_h = static_cast(round(bottom_rois[4]) + 1.) * spatial_scale;

      // Force too small ROIs to be 1x1
      Dtype roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0
      Dtype roi_height = max(roi_end_h - roi_start_h, 0.1);

      // Compute w and h at bottom 
      //计算bin桶的大小
      Dtype bin_size_h = roi_height / static_cast(pooled_height);
      Dtype bin_size_w = roi_width / static_cast(pooled_width);

      //计算第(i,j)个bin桶在feature map上的坐标范围
      int hstart = floor(static_cast(ph) * bin_size_h
                          + roi_start_h);
      int wstart = floor(static_cast(pw)* bin_size_w
                          + roi_start_w);
      int hend = ceil(static_cast(ph + 1) * bin_size_h
                        + roi_start_h);
      int wend = ceil(static_cast(pw + 1) * bin_size_w
                        + roi_start_w);
      // Add roi offsets and clip to input boundaries
      // 超出feature map尺寸的范围舍弃
      hstart = min(max(hstart, 0), height);
      hend = min(max(hend, 0), height);
      wstart = min(max(wstart, 0),width);
      wend = min(max(wend, 0), width);
      bool is_empty = (hend <= hstart) || (wend <= wstart);

      int gw = pw;
      int gh = ph;
      //ctop*group_size*group_size+gh*gh*group_size+gw,计算得到的是第ctop类的(ph,pw)位置索引
      //例如,score map上第ctop=1类的第(i,j)=(1,1)位置,c=1*49+1*7+1,对于feature map上第c个颜色层中(实际包含C=21层)的第2(ctop+1)层
      int c = (ctop*group_size + gh)*group_size + gw;  

      //每次只计算一层的avg pooling得分
      //第roi_batch_ind个roi的时候,bottom_data需要移动roi_batch_ind*channels层(每层需移动height * width)
      //score map上第(i,j,class)=(ph,pw,ctop)位置(索引为c),移动c层
      //bottom_data为数据指针,此处是在移动指针,而不是代数求和
      bottom_data += (roi_batch_ind * channels + c) * height * width;   
      Dtype out_sum = 0;
      for (int h = hstart; h < hend; ++h){
        for (int w = wstart; w < wend; ++w){
          int bottom_index = h*width + w;
          out_sum += bottom_data[bottom_index];
        }
      }

      Dtype bin_area = (hend - hstart)*(wend - wstart);  
      //计算第(i,j)个bin桶在feature map上的面积
      top_data[index] = is_empty? 0. : out_sum/bin_area;   
      //如果第(i,j)个bin桶宽高不合乎逻辑,则输出为0,否则为平均池化值
      mapping_channel[index] = c;
      //记录第index次迭代计算socre map上索引位置
    }
  }

  template 
  void PSROIPoolingLayer::Forward_gpu(const vector*>& bottom,
    const vector*>& top) {
    const Dtype* bottom_data = bottom[0]->gpu_data();
    const Dtype* bottom_rois = bottom[1]->gpu_data();
    Dtype* top_data = top[0]->mutable_gpu_data();
    int* mapping_channel_ptr = mapping_channel_.mutable_gpu_data();
    int count = top[0]->count();
    caffe_gpu_set(count, Dtype(0), top_data);
    caffe_gpu_set(count, -1, mapping_channel_ptr);
    // NOLINT_NEXT_LINE(whitespace/operators)
    PSROIPoolingForward << > >(
      count, bottom_data, spatial_scale_, channels_, height_, width_, pooled_height_,
      pooled_width_, bottom_rois, output_dim_, group_size_, top_data, mapping_channel_ptr);
    //调研上面的PSROIPoolingForward函数,传入线程数量及其他需要的数据
    CUDA_POST_KERNEL_CHECK;
  }
}

好几年没写C++了,CUDA也没研究过,所以有些解读只是yy一下,望各位大虾勿喷~

你可能感兴趣的:(目标检测,R-FCN,caffe源码,PS)