ROIPoolingLayer源码解析

ROIPoolingLayer源码解析

标签(空格分隔): 物体检测 faster-rcnn 源码


// ------------------------------------------------------------------
// Fast R-CNN
// Copyright (c) 2015 Microsoft
// Licensed under The MIT License [see fast-rcnn/LICENSE for details]
// Written by Ross Girshick
// ------------------------------------------------------------------

#include 

#include "caffe/fast_rcnn_layers.hpp"

using std::max;
using std::min;
using std::floor;
using std::ceil;
/*

ROIPoolingLayer的作用将之前产生的Region Proposals pool成固定的大小的feature map

在faster-rcnn中有两组数据流向到ROIPoolingLayer

bottom[0]一组数据:是共享层的最后一层的feauter map 大小为(n,c,w,h);

bottom[1]另一组数据是:生成的regsion proposal,具体是batch的索引,以及对应的坐标 ,是一个二维数组大小为(number_anchor,5)
每一行的意思为(batch的索引,start_h,start_w,end_h,end_w) 注意坐标对应的原图像的坐标


ROPPoolingLayer输出为一系列的feature map 大小为(num_rois,channel_,pooled_w,pooled_h)

num_rois:产生Region Proposals的个数;
channel_: bottom[0]的通道数;
pooled_w,pooled_h:输出的feature的大小

*/

namespace caffe {
/*
LayerSetUp函数从prototxt文件中读取相关的参数

spatial_scale_:指定bottom[0]的feature map相对原图像的尺度

pooled_w,pooled_h:输出的feature的大小


*/
template <typename Dtype>
void ROIPoolingLayer::LayerSetUp(const vector*>& bottom,
      const vector*>& top) {
  ROIPoolingParameter roi_pool_param = this->layer_param_.roi_pooling_param();
  CHECK_GT(roi_pool_param.pooled_h(), 0)
      << "pooled_h must be > 0";
  CHECK_GT(roi_pool_param.pooled_w(), 0)
      << "pooled_w must be > 0";
  pooled_height_ = roi_pool_param.pooled_h();
  pooled_width_ = roi_pool_param.pooled_w();
  spatial_scale_ = roi_pool_param.spatial_scale();
  LOG(INFO) << "Spatial scale: " << spatial_scale_;
}
/*
Reshape函数确定输出数据top[0]的大小,所占的内存大小
(num_rois,channel_,pooled_w,pooled_h)

*/

template <typename Dtype>
void ROIPoolingLayer::Reshape(const vector*>& bottom,
      const vector*>& top) {
  channels_ = bottom[0]->channels();
  height_ = bottom[0]->height();
  width_ = bottom[0]->width();
  top[0]->Reshape(bottom[1]->num(), channels_, pooled_height_,
      pooled_width_);
  max_idx_.Reshape(bottom[1]->num(), channels_, pooled_height_,
      pooled_width_);
}

template <typename Dtype>
void ROIPoolingLayer::Forward_cpu(const vector*>& bottom,
      const vector*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  const Dtype* bottom_rois = bottom[1]->cpu_data();
  // Number of ROIs
  int num_rois = bottom[1]->num();   //rois的个数
  int batch_size = bottom[0]->num();  //batch_size
  int top_count = top[0]->count();
  Dtype* top_data = top[0]->mutable_cpu_data();
  caffe_set(top_count, Dtype(-FLT_MAX), top_data);
  int* argmax_data = max_idx_.mutable_cpu_data();
  caffe_set(top_count, -1, argmax_data);  //argmax_data赋予无限小的数据

  // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
  for (int n = 0; n < num_rois; ++n) {
    int roi_batch_ind = bottom_rois[0];//获取当前roi对应于batch的索引

    //------将roi的坐标按照缩放比例映射到bottom[0]的尺度---------------//
    int roi_start_w = round(bottom_rois[1] * spatial_scale_);  
    int roi_start_h = round(bottom_rois[2] * spatial_scale_);
    int roi_end_w = round(bottom_rois[3] * spatial_scale_);
    int roi_end_h = round(bottom_rois[4] * spatial_scale_);
    //-----------------------------------------------------------//

    CHECK_GE(roi_batch_ind, 0);
    CHECK_LT(roi_batch_ind, batch_size);

    int roi_height = max(roi_end_h - roi_start_h + 1, 1);
    int roi_width = max(roi_end_w - roi_start_w + 1, 1);
    //求取pooling的pooling kernel的窗口大小----------------//
    const Dtype bin_size_h = static_cast(roi_height)
                             / static_cast(pooled_height_);
    const Dtype bin_size_w = static_cast(roi_width)
                             / static_cast(pooled_width_);
    //--------------------------------------------------------//

    //获取对应batch的feature maps: 大小为(channel_,w,h)
    const Dtype* batch_data = bottom_data + bottom[0]->offset(roi_batch_ind);

    for (int c = 0; c < channels_; ++c) {
      for (int ph = 0; ph < pooled_height_; ++ph) {
        for (int pw = 0; pw < pooled_width_; ++pw) {
          // Compute pooling region for this output unit:
          //  start (included) = floor(ph * roi_height / pooled_height_)
          //  end (excluded) = ceil((ph + 1) * roi_height / pooled_height_)

          //根据roi放缩后的坐标找到在feature map上找到对应的位置(hstart,wstart,hend,wend)
          int hstart = static_cast<int>(floor(static_cast(ph)
                                              * bin_size_h));
          int wstart = static_cast<int>(floor(static_cast(pw)
                                              * bin_size_w));
          int hend = static_cast<int>(ceil(static_cast(ph + 1)
                                           * bin_size_h));
          int wend = static_cast<int>(ceil(static_cast(pw + 1)
                                           * bin_size_w));
          //当然该矩形框必须要在feature map的范围之内,不能超过了feature map的大小
          hstart = min(max(hstart + roi_start_h, 0), height_);
          hend = min(max(hend + roi_start_h, 0), height_);
          wstart = min(max(wstart + roi_start_w, 0), width_);
          wend = min(max(wend + roi_start_w, 0), width_);

          bool is_empty = (hend <= hstart) || (wend <= wstart);


          //然后 找到 top[0]上feature map对应的点
          const int pool_index = ph * pooled_width_ + pw;

          if (is_empty) {
            top_data[pool_index] = 0;
            argmax_data[pool_index] = -1;
          }

          for (int h = hstart; h < hend; ++h) {
            for (int w = wstart; w < wend; ++w) {
              //在bottom[0]的featuer map上对应的点,该pooling是maxpool
              const int index = h * width_ + w;
              if (batch_data[index] > top_data[pool_index]) {
                top_data[pool_index] = batch_data[index];
                argmax_data[pool_index] = index;
              }
            }
          }
        }
      }
      // Increment all data pointers by one channel
      batch_data += bottom[0]->offset(0, 1);
      top_data += top[0]->offset(0, 1);
      argmax_data += max_idx_.offset(0, 1);
    }
    // Increment ROI data pointer
    bottom_rois += bottom[1]->offset(1);
  }
}

template <typename Dtype>
void ROIPoolingLayer::Backward_cpu(const vector*>& top,
      const vector<bool>& propagate_down, const vector*>& bottom) {
  NOT_IMPLEMENTED;
}


#ifdef CPU_ONLY
STUB_GPU(ROIPoolingLayer);
#endif

INSTANTIATE_CLASS(ROIPoolingLayer);
REGISTER_LAYER_CLASS(ROIPooling);

}  // namespace caffe

你可能感兴趣的:(深度学习与计算机视觉,论文笔记,源码)