SPP:Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition(阅读)

SPP-Net已经是很早之前读过的,没有记录,现在整理检测系列的这几篇,这里简单在过一遍。

论文中最重要的部分是SPM,SPM的详细阅读见博客。

先看一下整体sppnet_poster,poster在何凯明大神的主页上有链接。

SPP:Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition(阅读)_第1张图片

SPP:Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition(阅读)_第2张图片

SPP:Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition(阅读)_第3张图片

SPP:Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition(阅读)_第4张图片

SPP:Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition(阅读)_第5张图片

SPP:Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition(阅读)_第6张图片

Detection的SPP-net和RCNN流程基本一样,只是在Conv5和fc6中间加入了SPP_layer层,目的就是为了固定特征,使得输出的特征图是一样的,这样不受输出的图像尺寸限制,尺寸按理是不受大小限制,但是实际不应该偏差很大不然学到的特征差距很大。

SPP:Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition(阅读)_第7张图片

速度很快的一个原因就是,不用对每一个regions进行前向传播,这样保证了一个网络可以跑2000个框。

重点:

SPP:Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition(阅读)_第8张图片

这是一个三层的金字塔,stride和sizeX的大小是根据金字塔的层数确定。比如pool3x3中stride=向下取整(13/3)=4,sizeX=向上取整(13/3)=5。

=========================================================================================

spp_layer.cpp实现:

#include 
#include 

#include "caffe/layer.hpp"
#include "caffe/layers/concat_layer.hpp"
#include "caffe/layers/flatten_layer.hpp"
#include "caffe/layers/pooling_layer.hpp"
#include "caffe/layers/split_layer.hpp"
#include "caffe/layers/spp_layer.hpp"
namespace caffe {

using std::min;
using std::max;

template 
//1、输入参数pyramid_level:表示金字塔的第几层。我们将对这一层,进行划分为2^n个图片块。金字塔从第0层开始算起,0层就是一整张图片
//第1层就是把图片划分为2*2个块,第2层把图片划分为4*4个块,以此类推……,也就是说我们块的大小就是[w/(2^n),h/(2^n)]
//2、参数bottom_w、bottom_h是我们要输入这一层网络的特征图的大小
//3、参数spp_param是设置我们要进行池化的方法,比如最大池化、均值池化、概率池化……
LayerParameter SPPLayer::GetPoolingParam(const int pyramid_level,
      const int bottom_h, const int bottom_w, const SPPParameter spp_param)
{
  LayerParameter pooling_param;
  int num_bins = pow(2, pyramid_level);//计算可以划分多少个刻度,最后我们图片块的个数就是num_bins*num_bins
   //计算垂直方向上可以划分多少个刻度,不足的用pad补齐。然后我们最后每个图片块的大小就是(kernel_w,kernel_h)
  int kernel_h = ceil(bottom_h / static_cast(num_bins));//向上取整。采用pad补齐,pad的像素都是0
  int remainder_h = kernel_h * num_bins - bottom_h;
  int pad_h = (remainder_h + 1) / 2;//上下两边分摊pad
//计算水平方向的刻度大小,不足的用pad补齐
  int kernel_w = ceil(bottom_w / static_cast(num_bins));
  int remainder_w = kernel_w * num_bins - bottom_w;
  int pad_w = (remainder_w + 1) / 2;

  
  pooling_param.mutable_pooling_param()->set_pad_h(pad_h);
  pooling_param.mutable_pooling_param()->set_pad_w(pad_w);
  pooling_param.mutable_pooling_param()->set_kernel_h(kernel_h);
  pooling_param.mutable_pooling_param()->set_kernel_w(kernel_w);
  pooling_param.mutable_pooling_param()->set_stride_h(kernel_h);
  pooling_param.mutable_pooling_param()->set_stride_w(kernel_w);

  switch (spp_param.pool()) {
  case SPPParameter_PoolMethod_MAX://窗口最大池化
    pooling_param.mutable_pooling_param()->set_pool(
        PoolingParameter_PoolMethod_MAX);
    break;
  case SPPParameter_PoolMethod_AVE://平均池化
    pooling_param.mutable_pooling_param()->set_pool(
        PoolingParameter_PoolMethod_AVE);
    break;
  case SPPParameter_PoolMethod_STOCHASTIC://随机概率池化
    pooling_param.mutable_pooling_param()->set_pool(
        PoolingParameter_PoolMethod_STOCHASTIC);
    break;
  default:
    LOG(FATAL) << "Unknown pooling method.";
  }

  return pooling_param;
}

template 
//这个函数是为了获取我们本层网络的输入特征图、输出相关参数,然后设置相关变量,比如输入特征图的图片的大小、个数
void SPPLayer::LayerSetUp(const vector*>& bottom,
      const vector*>& top) {
  SPPParameter spp_param = this->layer_param_.spp_param();

  num_ = bottom[0]->num();//batch size 大小
  channels_ = bottom[0]->channels();//特征图个数
  bottom_h_ = bottom[0]->height();//特征图宽高
  bottom_w_ = bottom[0]->width();
  reshaped_first_time_ = false;
  CHECK_GT(bottom_h_, 0) << "Input dimensions cannot be zero.";
  CHECK_GT(bottom_w_, 0) << "Input dimensions cannot be zero.";

  pyramid_height_ = spp_param.pyramid_height();//金子塔有多少层
  split_top_vec_.clear();//清空相关数据
  pooling_bottom_vecs_.clear();
  pooling_layers_.clear();
  pooling_top_vecs_.clear();
  pooling_outputs_.clear();
  flatten_layers_.clear();
  flatten_top_vecs_.clear();
  flatten_outputs_.clear();
  concat_bottom_vec_.clear();
  //如果金字塔只有一层,那么我们其实是对一整张图片进行pooling,也就是文献所提到的:global pooling
  if (pyramid_height_ == 1) {
    // pooling layer setup
    LayerParameter pooling_param = GetPoolingParam(0, bottom_h_, bottom_w_,spp_param);
    pooling_layers_.push_back(shared_ptr > (new PoolingLayer(pooling_param)));
    pooling_layers_[0]->SetUp(bottom, top);
    return;
  }
  //这个将用于保存金子塔每一层
  for (int i = 0; i < pyramid_height_; i++) {
    split_top_vec_.push_back(new Blob());
  }

  // split layer setup
  LayerParameter split_param;
  split_layer_.reset(new SplitLayer(split_param));
  split_layer_->SetUp(bottom, split_top_vec_);

  for (int i = 0; i < pyramid_height_; i++) {
    // pooling layer input holders setup
    pooling_bottom_vecs_.push_back(new vector*>);
    pooling_bottom_vecs_[i]->push_back(split_top_vec_[i]);
    pooling_outputs_.push_back(new Blob());
    pooling_top_vecs_.push_back(new vector*>);
    pooling_top_vecs_[i]->push_back(pooling_outputs_[i]);

    // 获取金字塔每一层相关参数
    LayerParameter pooling_param = GetPoolingParam(i, bottom_h_, bottom_w_, spp_param);

    pooling_layers_.push_back(shared_ptr > (new PoolingLayer(pooling_param)));
    pooling_layers_[i]->SetUp(*pooling_bottom_vecs_[i], *pooling_top_vecs_[i]);

    //每一层金字塔输出向量
    flatten_outputs_.push_back(new Blob());
    flatten_top_vecs_.push_back(new vector*>);
    flatten_top_vecs_[i]->push_back(flatten_outputs_[i]);

    // flatten layer setup
    LayerParameter flatten_param;
    flatten_layers_.push_back(new FlattenLayer(flatten_param));
    flatten_layers_[i]->SetUp(*pooling_top_vecs_[i], *flatten_top_vecs_[i]);

    // concat layer input holders setup
    concat_bottom_vec_.push_back(flatten_outputs_[i]);
  }

  // 把所有金字塔层的输出,串联成一个特征向量
  LayerParameter concat_param;
  concat_layer_.reset(new ConcatLayer(concat_param));
  concat_layer_->SetUp(concat_bottom_vec_, top);
}


================================================================================================================================

感谢:

http://blog.csdn.net/hjimce/article/details/50187655

你可能感兴趣的:(论文阅读)