Caffe中新建Layer--改写Faster-RCNN的proposal layer

目录

    • 目录
      • 第一步在caffeproto中添加LayerParameter相关参数
      • 第二步实现layer的cpphpp文件
      • 第三步测试
      • 编译CPU模式遇到问题
      • 原文链接httpblogcsdnnetu011956147articledetails60762463


在caffe中新建layer主要遵从一下几步:

  1. 在caffe.proto中添加LayerParameter相关参数
  2. 编写实现layer函数,包括hpp、cpp和cu文件
  3. 有时需要在修改layer_factory
  4. 调试

由于项目需要,这里主要是把Faster RCNN 中的proposal layer换成C++的,只需要cpu版本,主要是参考pvanet,按自己的要求做了相应的修改,在这里记录下学习原始proposal layer的编写过程。


第一步、在caffe.proto中添加LayerParameter相关参数

在文件caffe/src/proto/caffe.proto中

// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available layer-specific ID: 147 (last added: recurrent_param)
message LayerParameter {
  optional string name = 1; // the layer name
  optional string type = 2; // the layer type
  repeated string bottom = 3; // the name of each bottom blob
  repeated string top = 4; // the name of each top blob
  .
  .
  .
  optional ROIPoolingParameter roi_pooling_param = 8266711;
  optional SmoothL1LossParameter smooth_l1_loss_param = 8266712;
  optional ProposalParameter proposal_param = 8266713;
}

在这个文件中会出现V0LayerParameter和V1LayerParameter,这两个参数是在旧版caffe中的,不用管,上面出现的ROIPoolingParameter和SmoothL1LossParameter是基于py-faster-rcnn代码来的,添加自己的ProposalParameter即可。
参考其他层是实现,然后就需要ProposalParameter,仿照python代码的写法,定义feat_stride等参数

// Message that stores parameters used by ProposalLayer
message ProposalParameter {
  optional uint32 feat_stride = 1 [default = 16];
  optional uint32 base_size = 2 [default = 16];
  optional uint32 min_size = 3 [default = 16];
  repeated float ratio = 4;
  repeated float scale = 5;
  optional uint32 pre_nms_topn = 6 [default = 6000];
  optional uint32 post_nms_topn = 7 [default = 300];
  optional float nms_thresh = 8 [default = 0.7];
}

像ConvolutionParameter其中还会有下面这些参数,在这里先都不需要用到

  enum Engine {
    DEFAULT = 0;
    CAFFE = 1;
    CUDNN = 2;
  }

到这里,caffe.proto就基本完成了。

第二步、实现layer的cpp、hpp文件

写完caffe.proto后,需要加入具体layer,与其他的layer类似,在caffe/src/caffe/layer中建立一个proposal_layer.cpp

#include "caffe/fast_rcnn_layers.hpp"
#include "caffe/util/nms.hpp"

#define ROUND(x) ((int)((x) + (Dtype)0.5))

using std::max;
using std::min;

namespace caffe {

template <typename Dtype>
static
int transform_box(Dtype box[],
                  const Dtype dx, const Dtype dy,
                  const Dtype d_log_w, const Dtype d_log_h,
                  const Dtype img_W, const Dtype img_H,
                  const Dtype min_box_W, const Dtype min_box_H)
{
  // width & height of box
  const Dtype w = box[2] - box[0] + (Dtype)1;
  const Dtype h = box[3] - box[1] + (Dtype)1;
  // center location of box
  const Dtype ctr_x = box[0] + (Dtype)0.5 * w;
  const Dtype ctr_y = box[1] + (Dtype)0.5 * h;

  // new center location according to gradient (dx, dy)
  const Dtype pred_ctr_x = dx * w + ctr_x;
  const Dtype pred_ctr_y = dy * h + ctr_y;
  // new width & height according to gradient d(log w), d(log h)
  const Dtype pred_w = exp(d_log_w) * w;
  const Dtype pred_h = exp(d_log_h) * h;

  // update upper-left corner location
  box[0] = pred_ctr_x - (Dtype)0.5 * pred_w;
  box[1] = pred_ctr_y - (Dtype)0.5 * pred_h;
  // update lower-right corner location
  box[2] = pred_ctr_x + (Dtype)0.5 * pred_w;
  box[3] = pred_ctr_y + (Dtype)0.5 * pred_h;

  // adjust new corner locations to be within the image region,
  box[0] = std::max((Dtype)0,  std::min(box[0],  img_W - (Dtype)1));
  box[1] = std::max((Dtype)0,  std::min(box[1],  img_H - (Dtype)1));
  box[2] = std::max((Dtype)0,  std::min(box[2],  img_W - (Dtype)1));
  box[3] = std::max((Dtype)0,  std::min(box[3],  img_H - (Dtype)1));

  // recompute new width & height
  const Dtype box_w = box[2] - box[0] + (Dtype)1;
  const Dtype box_h = box[3] - box[1] + (Dtype)1;

  // check if new box's size >= threshold
  return (box_w >= min_box_W) * (box_h >= min_box_H);
}

template <typename Dtype>
static
void sort_box(Dtype list_cpu[], const int start, const int end,
              const int num_top)
{
  const Dtype pivot_score = list_cpu[start * 5 + 4];
  int left = start + 1, right = end;
  Dtype temp[5];
  while (left <= right) {
    while (left <= end && list_cpu[left * 5 + 4] >= pivot_score) ++left;
    while (right > start && list_cpu[right * 5 + 4] <= pivot_score) --right;
    if (left <= right) {
      for (int i = 0; i < 5; ++i) {
        temp[i] = list_cpu[left * 5 + i];
      }
      for (int i = 0; i < 5; ++i) {
        list_cpu[left * 5 + i] = list_cpu[right * 5 + i];
      }
      for (int i = 0; i < 5; ++i) {
        list_cpu[right * 5 + i] = temp[i];
      }
      ++left;
      --right;
    }
  }

  if (right > start) {
    for (int i = 0; i < 5; ++i) {
      temp[i] = list_cpu[start * 5 + i];
    }
    for (int i = 0; i < 5; ++i) {
      list_cpu[start * 5 + i] = list_cpu[right * 5 + i];
    }
    for (int i = 0; i < 5; ++i) {
      list_cpu[right * 5 + i] = temp[i];
    }
  }

  if (start < right - 1) {
    sort_box(list_cpu, start, right - 1, num_top);
  }
  if (right + 1 < num_top && right + 1 < end) {
    sort_box(list_cpu, right + 1, end, num_top);
  }
}

template <typename Dtype>
static
void generate_anchors(int base_size,
                      const Dtype ratios[],
                      const Dtype scales[],
                      const int num_ratios,
                      const int num_scales,
                      Dtype anchors[])
{
  // base box's width & height & center location
  const Dtype base_area = (Dtype)(base_size * base_size);
  const Dtype center = (Dtype)0.5 * (base_size - (Dtype)1);

  // enumerate all transformed boxes
  Dtype* p_anchors = anchors;
  for (int i = 0; i < num_ratios; ++i) {
    // transformed width & height for given ratio factors
    const Dtype ratio_w = (Dtype)ROUND(sqrt(base_area / ratios[i]));
    const Dtype ratio_h = (Dtype)ROUND(ratio_w * ratios[i]);

    for (int j = 0; j < num_scales; ++j) {
      // transformed width & height for given scale factors
      const Dtype scale_w = (Dtype)0.5 * (ratio_w * scales[j] - (Dtype)1);
      const Dtype scale_h = (Dtype)0.5 * (ratio_h * scales[j] - (Dtype)1);

      // (x1, y1, x2, y2) for transformed box
      p_anchors[0] = center - scale_w;
      p_anchors[1] = center - scale_h;
      p_anchors[2] = center + scale_w;
      p_anchors[3] = center + scale_h;
      p_anchors += 4;
    } // endfor j
  }
}

template <typename Dtype>
static
void enumerate_proposals_cpu(const Dtype bottom4d[],
                             const Dtype d_anchor4d[],
                             const Dtype anchors[],
                             Dtype proposals[],
                             const int num_anchors,
                             const int bottom_H, const int bottom_W,
                             const Dtype img_H, const Dtype img_W,
                             const Dtype min_box_H, const Dtype min_box_W,
                             const int feat_stride)
{
  Dtype* p_proposal = proposals;
  const int bottom_area = bottom_H * bottom_W;

  for (int h = 0; h < bottom_H; ++h) {
    for (int w = 0; w < bottom_W; ++w) {
      const Dtype x = w * feat_stride;
      const Dtype y = h * feat_stride;
      const Dtype* p_box = d_anchor4d + h * bottom_W + w;
      const Dtype* p_score = bottom4d + h * bottom_W + w;
      for (int k = 0; k < num_anchors; ++k) {
        const Dtype dx = p_box[(k * 4 + 0) * bottom_area];
        const Dtype dy = p_box[(k * 4 + 1) * bottom_area];
        const Dtype d_log_w = p_box[(k * 4 + 2) * bottom_area];
        const Dtype d_log_h = p_box[(k * 4 + 3) * bottom_area];

        p_proposal[0] = x + anchors[k * 4 + 0];
        p_proposal[1] = y + anchors[k * 4 + 1];
        p_proposal[2] = x + anchors[k * 4 + 2];
        p_proposal[3] = y + anchors[k * 4 + 3];
        p_proposal[4]
            = transform_box(p_proposal,
                            dx, dy, d_log_w, d_log_h,
                            img_W, img_H, min_box_W, min_box_H)
              * p_score[k * bottom_area];   //这步就去掉越过图片边框的proposals
        p_proposal += 5;
      } // endfor k
    } // endfor w
  } // endfor h
}

template <typename Dtype>
static
void retrieve_rois_cpu(const int num_rois,
                       const int item_index,
                       const Dtype proposals[],
                       const int roi_indices[],
                       Dtype rois[],
                       Dtype roi_scores[])
{
  for (int i = 0; i < num_rois; ++i) {
    const Dtype* const proposals_index = proposals + roi_indices[i] * 5;
    rois[i * 5 + 0] = item_index;
    rois[i * 5 + 1] = proposals_index[0];
    rois[i * 5 + 2] = proposals_index[1];
    rois[i * 5 + 3] = proposals_index[2];
    rois[i * 5 + 4] = proposals_index[3];
    if (roi_scores) {
      roi_scores[i] = proposals_index[4];
    }
  }
}

template <typename Dtype>
void ProposalLayer::LayerSetUp(const vector*>& bottom,
      const vector*>& top) {

  ProposalParameter param = this->layer_param_.proposal_param();
  //参考proposal_layer.py
  //forward的cfg参数
  base_size_ = param.base_size();
  feat_stride_ = param.feat_stride();  
  pre_nms_topn_ = param.pre_nms_topn();  //使用nms前RPN proposals最大数量
  post_nms_topn_ = param.post_nms_topn();//使用nms后RPN proposals数量
  nms_thresh_ = param.nms_thresh();      //阈值 0.7
  min_size_ = param.min_size();          //proposal的H W都要大于16
  //实现generate_anchors
  vector ratios(param.ratio_size());
  for (int i = 0; i < param.ratio_size(); ++i) {
    ratios[i] = param.ratio(i);
  }
  vector scales(param.scale_size());
  for (int i = 0; i < param.scale_size(); ++i) {
    scales[i] = param.scale(i);
  }

  vector<int> anchors_shape(2);
  anchors_shape[0] = ratios.size() * scales.size();
  anchors_shape[1] = 4;
  anchors_.Reshape(anchors_shape);
  generate_anchors(base_size_, &ratios[0], &scales[0],
                   ratios.size(), scales.size(),
                   anchors_.mutable_cpu_data());

  vector<int> roi_indices_shape(1);
  roi_indices_shape[0] = post_nms_topn_;
  roi_indices_.Reshape(roi_indices_shape);
  //输出Top[0],是 R个 regions of interest, each is a 5-tuple (n, x1, y1, x2, y2) ,
  //其中n 代表batch index; x1, y1, x2, y2表示矩形的4个点的坐标。
  //输出Top[1]为每个proposal的得分,即是一个物体的可能性。
  // rois blob : holds R regions of interest, each is a 5 - tuple
  // (n, x1, y1, x2, y2) specifying an image batch index n and a
  // rectangle(x1, y1, x2, y2)
  vector<int> top_shape(2);
  top_shape[0] = bottom[0]->shape(0) * post_nms_topn_;
  top_shape[1] = 5;
  top[0]->Reshape(top_shape);

  // scores blob : holds scores for R regions of interest
  if (top.size() > 1) {
    top_shape.pop_back();
    top[1]->Reshape(top_shape);
  }
}

template <typename Dtype>
void ProposalLayer::Forward_cpu(const vector*>& bottom,
                                       const vector*>& top)
{
  CHECK_EQ(bottom[0]->shape(0), 1) << "Only single item batches are supported";

  const Dtype* p_bottom_item = bottom[0]->cpu_data();
  const Dtype* p_d_anchor_item = bottom[1]->cpu_data();
  const Dtype* p_img_info_cpu = bottom[2]->cpu_data();
  Dtype* p_roi_item = top[0]->mutable_cpu_data();
  Dtype* p_score_item = (top.size() > 1) ? top[1]->mutable_cpu_data() : NULL;

  vector<int> proposals_shape(2);
  vector<int> top_shape(2);
  proposals_shape[0] = 0;
  proposals_shape[1] = 5;
  top_shape[0] = 0;
  top_shape[1] = 5;

  for (int n = 0; n < bottom[0]->shape(0); ++n) {
    // bottom shape: (2 x num_anchors) x H x W
    const int bottom_H = bottom[0]->height();
    const int bottom_W = bottom[0]->width();
    // input image height & width
    const Dtype img_H = p_img_info_cpu[0];
    const Dtype img_W = p_img_info_cpu[1];
    // scale factor for height & width
    const Dtype scale_H = p_img_info_cpu[2];
    const Dtype scale_W = p_img_info_cpu[3];
    // minimum box width & height
    const Dtype min_box_H = min_size_ * scale_H;
    const Dtype min_box_W = min_size_ * scale_W;
    // number of all proposals = num_anchors * H * W
    const int num_proposals = anchors_.shape(0) * bottom_H * bottom_W;
    // number of top-n proposals before NMS
    const int pre_nms_topn = std::min(num_proposals,  pre_nms_topn_);
    // number of final RoIs
    int num_rois = 0;

    // enumerate all proposals
    //   num_proposals = num_anchors * H * W
    //   (x1, y1, x2, y2, score) for each proposal
    // NOTE: for bottom, only foreground scores are passed
    proposals_shape[0] = num_proposals;
    proposals_.Reshape(proposals_shape);
    enumerate_proposals_cpu(   
        p_bottom_item + num_proposals,  p_d_anchor_item,
        anchors_.cpu_data(),  proposals_.mutable_cpu_data(),  anchors_.shape(0),
        bottom_H,  bottom_W,  img_H,  img_W,  min_box_H,  min_box_W,
        feat_stride_);  //得到所有proposals,就是python中的

    sort_box(proposals_.mutable_cpu_data(), 0, num_proposals - 1, pre_nms_topn_);
    //读取数据的时候使用cpu_data
    //修改数据的时候使用mutable_cpu_data
    nms_cpu(pre_nms_topn,  proposals_.cpu_data(),
            roi_indices_.mutable_cpu_data(),  &num_rois,
            0,  nms_thresh_,  post_nms_topn_);

    retrieve_rois_cpu(
        num_rois,  n,  proposals_.cpu_data(),  roi_indices_.cpu_data(),
        p_roi_item,  p_score_item);

    top_shape[0] += num_rois;
  }

  top[0]->Reshape(top_shape);
  if (top.size() > 1) {
    top_shape.pop_back();
    top[1]->Reshape(top_shape);
  }
}


#ifdef CPU_ONLY
STUB_GPU(ProposalLayer);
#endif

INSTANTIATE_CLASS(ProposalLayer);
REGISTER_LAYER_CLASS(Proposal);

}  // namespace caffe

补充完成fast_rcnn_layers.hpp

namespace caffe {

/* ROIPoolingLayer - Region of Interest Pooling Layer
*/
template <typename Dtype>
class ROIPoolingLayer : public Layer {
//...
};

template <typename Dtype>
class SmoothL1LossLayer : public LossLayer {
//...
};

template <typename Dtype>
class ProposalLayer : public Layer {
 public:
  explicit ProposalLayer(const LayerParameter& param)
      : Layer(param) {}
  virtual void LayerSetUp(const vector*>& bottom,
      const vector*>& top);
  virtual void Reshape(const vector*>& bottom,
      const vector*>& top) {
    //LOG(FATAL) << "Reshaping happens during the call to forward.";
  }

  virtual inline const char* type() const { return "ProposalLayer"; }

 protected:
  virtual void Forward_cpu(const vector*>& bottom,
      const vector*>& top);
  virtual void Forward_gpu(const vector*>& bottom,
      const vector*>& top);
  virtual void Backward_cpu(const vector*>& top,
      const vector<bool>& propagate_down, const vector*>& bottom) {}
  virtual void Backward_gpu(const vector*>& top,
      const vector<bool>& propagate_down, const vector*>& bottom) {}

  int base_size_;
  int feat_stride_;
  int pre_nms_topn_;
  int post_nms_topn_;
  Dtype nms_thresh_;
  int min_size_;
  Blob anchors_;
  Blob proposals_;
  Blob<int> roi_indices_;
  Blob<int> nms_mask_;
};

}  // namespace caffe

#endif  // CAFFE_FAST_RCNN_LAYERS_HPP_

同时在实现proposal layer的过程中,有一部很关键,就是非极大值抑制(nms),在原来的python版本中见lib/nms文件夹,这里,我们还需要添加nms相关hpp和cpp文件。
在caffe/src/caffe/util中添加nms.cpp文件:

#include "caffe/util/nms.hpp"

using std::max;
using std::min;

namespace caffe {

template <typename Dtype>
static
Dtype iou(const Dtype A[], const Dtype B[])
{
  if (A[0] > B[2] || A[1] > B[3] || A[2] < B[0] || A[3] < B[1]) {
    return 0;
  }

  // overlapped region (= box)
  const Dtype x1 = std::max(A[0],  B[0]);
  const Dtype y1 = std::max(A[1],  B[1]);
  const Dtype x2 = std::min(A[2],  B[2]);
  const Dtype y2 = std::min(A[3],  B[3]);

  // intersection area
  const Dtype width = std::max((Dtype)0,  x2 - x1 + (Dtype)1);
  const Dtype height = std::max((Dtype)0,  y2 - y1 + (Dtype)1);
  const Dtype area = width * height;

  // area of A, B
  const Dtype A_area = (A[2] - A[0] + (Dtype)1) * (A[3] - A[1] + (Dtype)1);
  const Dtype B_area = (B[2] - B[0] + (Dtype)1) * (B[3] - B[1] + (Dtype)1);

  // IoU
  return area / (A_area + B_area - area);
}

template static float iou(const float A[], const float B[]);
template static double iou(const double A[], const double B[]);

template <typename Dtype>
void nms_cpu(const int num_boxes,
             const Dtype boxes[],
             int index_out[],
             int* const num_out,
             const int base_index,
             const Dtype nms_thresh, const int max_num_out)
{
  int count = 0;
  std::vector<char> is_dead(num_boxes);
  for (int i = 0; i < num_boxes; ++i) {
    is_dead[i] = 0;
  }

  for (int i = 0; i < num_boxes; ++i) {
    if (is_dead[i]) {
      continue;
    }

    index_out[count++] = base_index + i;
    if (count == max_num_out) {
      break;
    }

    for (int j = i + 1; j < num_boxes; ++j) {
      if (!is_dead[j] && iou(&boxes[i * 5], &boxes[j * 5]) > nms_thresh) {
        is_dead[j] = 1;
      }
    }
  }

  *num_out = count;
  is_dead.clear();
}

template
void nms_cpu(const int num_boxes,
             const float boxes[],
             int index_out[],
             int* const num_out,
             const int base_index,
             const float nms_thresh, const int max_num_out);
template
void nms_cpu(const int num_boxes,
             const double boxes[],
             int index_out[],
             int* const num_out,
             const int base_index,
             const double nms_thresh, const int max_num_out);

}  // namespace caffe

在caffe/include/caffe/util中添加nms.hpp文件

#ifndef _CAFFE_UTIL_NMS_HPP_
#define _CAFFE_UTIL_NMS_HPP_

#include 

#include "caffe/blob.hpp"

namespace caffe {

template 
void nms_cpu(const int num_boxes,
             const Dtype boxes[],
             int index_out[],
             int* const num_out,
             const int base_index,
             const Dtype nms_thresh,
             const int max_num_out);

template 
void nms_gpu(const int num_boxes,
             const Dtype boxes_gpu[],
             Blob<int>* const p_mask,
             int index_out_cpu[],   
             int* const num_out,
             const int base_index,
             const Dtype nms_thresh,
             const int max_num_out);

}  // namespace caffe

#endif  // CAFFE_UTIL_NMS_HPP_

上面这些代码的注解不是很详细,具体解读等有时间了再来补充。


第三步、测试

在caffe中其实是需要些test部分代码的,这里没涉及到,上面的每一步,做完后就可以make下,根据提示的消息再来调代码。这里给出一个用matlab测试例子,是参考的其他博客,本人没有试过,仅供参考,出处:原文链接
利用MATLAB调试
Caffe的MATLAB接口可是个好东西,用MATLAB调试可以很方便地看到各种数据的形式以及结果。
我们这边需要调试自己实现的激活函数,shlu_layer。
第一步便是编写一个测试网络,具体如下:

name: "SHLUTEST"
input: "data"
input_dim: 1
input_dim: 1
input_dim: 100
input_dim: 100

# 测试后向过程必须加,不然回传的梯度都会是0
force_backward: true

layer {
  name: "shlu1"
  type: "Shlu" #这里的名字应该跟你之前定义的一致,要注意大小写
  bottom: "data"
  top: "shlu1"
  }

编写的这个网络实现的便是输入数据维数1*1*100*100,通过shlu_layer。
接下来,打开Matlab

cd caffe
matlab

编写代码如下:

addpath ./matlab
model = './shlu_test.prototxt';
caffe.set_mode_cpu();
# 测试gpu代码时请用GPU模式
#caffe.set_mode_gpu();
#caffe.set_device(gpu_id);
net = caffe.Net(model, 'test');
# 生成1*1*100*100维度的正态分布随机数,并填入'data'层的blobs
net.blobs('data').set_data(randn(net.blobs('data').shape));
# 前向过程
net.forward_prefilled();
# 检查生成的"res"是否是期望的结果
res = net.blobs('shlu1').get_data();
# 后向过程
# diff为自己设置的梯度值,保证维度一致
net.blobs('shlu1').set_diff(diff);
net.backward_prefilled();
# 检查生成的"data_diff"是否是期望的结果
data_diff = net.blobs('data').get_diff();

分别在cpu模式与gpu模式下都调试一遍,保证没有错误,再进行自己所需要的网络的整体配置。


编译CPU模式遇到问题

src/caffe/layers/proposal_layer.cpp:321:10: error: redefinition of ‘void caffe::ProposalLayer::Backward_gpu(const std::vector*>&, const std::vector&, const std::vector*>&)’
 STUB_GPU(ProposalLayer);
          ^
./include/caffe/util/device_alternate.hpp:17:6: note: in definition of macro ‘STUB_GPU’
 void classname::Backward_gpu(const vector*>& top, \
      ^
In file included from src/caffe/layers/proposal_layer.cpp:1:0:
./include/caffe/fast_rcnn_layers.hpp:122:16: note: ‘virtual void caffe::ProposalLayer::Backward_gpu(const std::vector*>&, const std::vector&, const std::vector*>&)’ previously declared here
   virtual void Backward_gpu(const vector*>& top,
                ^
Makefile:575: recipe for target '.build_release/src/caffe/layers/proposal_layer.o' failed
make: *** [.build_release/src/caffe/layers/proposal_layer.o] Error 1
make: *** Waiting for unfinished jobs....

解决方案:
将./include/caffe/fast_rcnn_layers.hpp的Backward_gpu代码

 virtual void Backward_gpu(const vector*>& top,
      const vector& propagate_down, const vector*>& bottom){}

换成

 virtual void Backward_gpu(const vector*>& top,
      const vector& propagate_down, const vector*>& bottom);

原因是:
caffe::ProposalLayer::Backward_gpu在./include/caffe/fast_rcnn_layers.hpp和./include/caffe/util/device_alternate.hpp(后者为模板形式)中定义了两次,被系统认为重定义。


原文链接:http://blog.csdn.net/u011956147/article/details/60762463

你可能感兴趣的:(Deep,Learning,Python,Obeject,Detection,Caffe)