本文针对目标检测算法R-FCN源代码中psroi_pooling_layer.cu的forward过程进行详细解读,具体示意图理解参考的是http://blog.csdn.net/shadow_guo/article/details/51767036,下面给出代码解读:
// ------------------------------------------------------------------
// R-FCN
// Copyright (c) 2016 Microsoft
// Licensed under The MIT License [see r-fcn/LICENSE for details]
// Written by Yi Li
// ------------------------------------------------------------------
#include
#include "caffe/rfcn_layers.hpp"
#include "caffe/util/gpu_util.cuh"
using std::max;
using std::min;
namespace caffe {
template
__global__ void PSROIPoolingForward(
const int nthreads,
const Dtype* bottom_data, //输入的feature map的像素值
const Dtype spatial_scale, //由之前所有卷积层的strides决定,此处为16
const int channels, //feature map的channels=k*k*(C+1)
const int height, //feature map的宽度
const int width, //feature map的高度
const int pooled_height, //==k=7
const int pooled_width, //==k=7
const Dtype* bottom_rois, //输入的roi,包括[batch_ind,x1,y1,x2,y2]
const int output_dim, //输出score map的dim,psroipooled_cls_rois时为21,psroipooled_loc_rois时为8
const int group_size, //==k=7
Dtype* top_data, //socre map的输出
int* mapping_channel) {
//使用CUDA多线程计算
CUDA_KERNEL_LOOP(index, nthreads) { //index为最终score map上所有,共有(C+1)*k*k个值
// The output is in order (n, ctop, ph, pw),类似于图像的BIL逐行扫描
int pw = index % pooled_width; //score map上第i=[0,k-1]列
int ph = (index / pooled_width) % pooled_height; //score map上第j=[0,k-1]行
int ctop = (index / pooled_width / pooled_height) % output_dim; //score map上第ctop个层(class)
int n = index / pooled_width / pooled_height / output_dim; //第n个roi
// [start, end) interval for spatial sampling
bottom_rois += n * 5; //bottom_rois每次移动5
int roi_batch_ind = bottom_rois[0]; //bottom_rois第0个位置存放的是batch_ind
//此处计算的是roi在feature_map上的坐标范围
Dtype roi_start_w = static_cast(round(bottom_rois[1])) * spatial_scale;
Dtype roi_start_h = static_cast(round(bottom_rois[2])) * spatial_scale;
Dtype roi_end_w = static_cast(round(bottom_rois[3]) + 1.) * spatial_scale;
Dtype roi_end_h = static_cast(round(bottom_rois[4]) + 1.) * spatial_scale;
// Force too small ROIs to be 1x1
Dtype roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0
Dtype roi_height = max(roi_end_h - roi_start_h, 0.1);
// Compute w and h at bottom
//计算bin桶的大小
Dtype bin_size_h = roi_height / static_cast(pooled_height);
Dtype bin_size_w = roi_width / static_cast(pooled_width);
//计算第(i,j)个bin桶在feature map上的坐标范围
int hstart = floor(static_cast(ph) * bin_size_h
+ roi_start_h);
int wstart = floor(static_cast(pw)* bin_size_w
+ roi_start_w);
int hend = ceil(static_cast(ph + 1) * bin_size_h
+ roi_start_h);
int wend = ceil(static_cast(pw + 1) * bin_size_w
+ roi_start_w);
// Add roi offsets and clip to input boundaries
// 超出feature map尺寸的范围舍弃
hstart = min(max(hstart, 0), height);
hend = min(max(hend, 0), height);
wstart = min(max(wstart, 0),width);
wend = min(max(wend, 0), width);
bool is_empty = (hend <= hstart) || (wend <= wstart);
int gw = pw;
int gh = ph;
//ctop*group_size*group_size+gh*gh*group_size+gw,计算得到的是第ctop类的(ph,pw)位置索引
//例如,score map上第ctop=1类的第(i,j)=(1,1)位置,c=1*49+1*7+1,对于feature map上第c个颜色层中(实际包含C=21层)的第2(ctop+1)层
int c = (ctop*group_size + gh)*group_size + gw;
//每次只计算一层的avg pooling得分
//第roi_batch_ind个roi的时候,bottom_data需要移动roi_batch_ind*channels层(每层需移动height * width)
//score map上第(i,j,class)=(ph,pw,ctop)位置(索引为c),移动c层
//bottom_data为数据指针,此处是在移动指针,而不是代数求和
bottom_data += (roi_batch_ind * channels + c) * height * width;
Dtype out_sum = 0;
for (int h = hstart; h < hend; ++h){
for (int w = wstart; w < wend; ++w){
int bottom_index = h*width + w;
out_sum += bottom_data[bottom_index];
}
}
Dtype bin_area = (hend - hstart)*(wend - wstart);
//计算第(i,j)个bin桶在feature map上的面积
top_data[index] = is_empty? 0. : out_sum/bin_area;
//如果第(i,j)个bin桶宽高不合乎逻辑,则输出为0,否则为平均池化值
mapping_channel[index] = c;
//记录第index次迭代计算socre map上索引位置
}
}
template
void PSROIPoolingLayer::Forward_gpu(const vector*>& bottom,
const vector*>& top) {
const Dtype* bottom_data = bottom[0]->gpu_data();
const Dtype* bottom_rois = bottom[1]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
int* mapping_channel_ptr = mapping_channel_.mutable_gpu_data();
int count = top[0]->count();
caffe_gpu_set(count, Dtype(0), top_data);
caffe_gpu_set(count, -1, mapping_channel_ptr);
// NOLINT_NEXT_LINE(whitespace/operators)
PSROIPoolingForward << > >(
count, bottom_data, spatial_scale_, channels_, height_, width_, pooled_height_,
pooled_width_, bottom_rois, output_dim_, group_size_, top_data, mapping_channel_ptr);
//调研上面的PSROIPoolingForward函数,传入线程数量及其他需要的数据
CUDA_POST_KERNEL_CHECK;
}
}