由于项目需要,这里主要是把Faster RCNN 中的proposal layer换成C++的,只需要cpu版本,主要是参考pvanet,按自己的要求做了相应的修改,在这里记录下学习原始proposal layer的编写过程。
// Update the next available ID when you add a new LayerParameter field.
// LayerParameter next available layer-specific ID: 147 (last added: recurrent_param)
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
repeated string bottom = 3; // the name of each bottom blob
repeated string top = 4; // the name of each top blob
optional ROIPoolingParameter roi_pooling_param = 8266711;
optional SmoothL1LossParameter smooth_l1_loss_param = 8266712;
optional ProposalParameter proposal_param = 8266713;
// Message that stores parameters used by ProposalLayer
message ProposalParameter {
optional uint32 feat_stride = 1 [default = 16];
optional uint32 base_size = 2 [default = 16];
optional uint32 min_size = 3 [default = 16];
repeated float ratio = 4;
repeated float scale = 5;
optional uint32 pre_nms_topn = 6 [default = 6000];
optional uint32 post_nms_topn = 7 [default = 300];
optional float nms_thresh = 8 [default = 0.7];
enum Engine {
CAFFE = 1;
CUDNN = 2;
#include "caffe/fast_rcnn_layers.hpp"
#include "caffe/util/nms.hpp"
#define ROUND(x) ((int)((x) + (Dtype)0.5))
using std::max;
using std::min;
namespace caffe {
template <typename Dtype>
int transform_box(Dtype box[],
const Dtype dx, const Dtype dy,
const Dtype d_log_w, const Dtype d_log_h,
const Dtype img_W, const Dtype img_H,
const Dtype min_box_W, const Dtype min_box_H)
// width & height of box
const Dtype w = box[2] - box[0] + (Dtype)1;
const Dtype h = box[3] - box[1] + (Dtype)1;
// center location of box
const Dtype ctr_x = box[0] + (Dtype)0.5 * w;
const Dtype ctr_y = box[1] + (Dtype)0.5 * h;
// new center location according to gradient (dx, dy)
const Dtype pred_ctr_x = dx * w + ctr_x;
const Dtype pred_ctr_y = dy * h + ctr_y;
// new width & height according to gradient d(log w), d(log h)
const Dtype pred_w = exp(d_log_w) * w;
const Dtype pred_h = exp(d_log_h) * h;
// update upper-left corner location
box[0] = pred_ctr_x - (Dtype)0.5 * pred_w;
box[1] = pred_ctr_y - (Dtype)0.5 * pred_h;
// update lower-right corner location
box[2] = pred_ctr_x + (Dtype)0.5 * pred_w;
box[3] = pred_ctr_y + (Dtype)0.5 * pred_h;
// adjust new corner locations to be within the image region,
box[0] = std::max((Dtype)0, std::min(box[0], img_W - (Dtype)1));
box[1] = std::max((Dtype)0, std::min(box[1], img_H - (Dtype)1));
box[2] = std::max((Dtype)0, std::min(box[2], img_W - (Dtype)1));
box[3] = std::max((Dtype)0, std::min(box[3], img_H - (Dtype)1));
// recompute new width & height
const Dtype box_w = box[2] - box[0] + (Dtype)1;
const Dtype box_h = box[3] - box[1] + (Dtype)1;
// check if new box's size >= threshold
return (box_w >= min_box_W) * (box_h >= min_box_H);
template <typename Dtype>
void sort_box(Dtype list_cpu[], const int start, const int end,
const int num_top)
const Dtype pivot_score = list_cpu[start * 5 + 4];
int left = start + 1, right = end;
Dtype temp[5];
while (left <= right) {
while (left <= end && list_cpu[left * 5 + 4] >= pivot_score) ++left;
while (right > start && list_cpu[right * 5 + 4] <= pivot_score) --right;
if (left <= right) {
for (int i = 0; i < 5; ++i) {
temp[i] = list_cpu[left * 5 + i];
for (int i = 0; i < 5; ++i) {
list_cpu[left * 5 + i] = list_cpu[right * 5 + i];
for (int i = 0; i < 5; ++i) {
list_cpu[right * 5 + i] = temp[i];
if (right > start) {
for (int i = 0; i < 5; ++i) {
temp[i] = list_cpu[start * 5 + i];
for (int i = 0; i < 5; ++i) {
list_cpu[start * 5 + i] = list_cpu[right * 5 + i];
for (int i = 0; i < 5; ++i) {
list_cpu[right * 5 + i] = temp[i];
if (start < right - 1) {
sort_box(list_cpu, start, right - 1, num_top);
if (right + 1 < num_top && right + 1 < end) {
sort_box(list_cpu, right + 1, end, num_top);
template <typename Dtype>
void generate_anchors(int base_size,
const Dtype ratios[],
const Dtype scales[],
const int num_ratios,
const int num_scales,
Dtype anchors[])
// base box's width & height & center location
const Dtype base_area = (Dtype)(base_size * base_size);
const Dtype center = (Dtype)0.5 * (base_size - (Dtype)1);
// enumerate all transformed boxes
Dtype* p_anchors = anchors;
for (int i = 0; i < num_ratios; ++i) {
// transformed width & height for given ratio factors
const Dtype ratio_w = (Dtype)ROUND(sqrt(base_area / ratios[i]));
const Dtype ratio_h = (Dtype)ROUND(ratio_w * ratios[i]);
for (int j = 0; j < num_scales; ++j) {
// transformed width & height for given scale factors
const Dtype scale_w = (Dtype)0.5 * (ratio_w * scales[j] - (Dtype)1);
const Dtype scale_h = (Dtype)0.5 * (ratio_h * scales[j] - (Dtype)1);
// (x1, y1, x2, y2) for transformed box
p_anchors[0] = center - scale_w;
p_anchors[1] = center - scale_h;
p_anchors[2] = center + scale_w;
p_anchors[3] = center + scale_h;
p_anchors += 4;
} // endfor j
template <typename Dtype>
void enumerate_proposals_cpu(const Dtype bottom4d[],
const Dtype d_anchor4d[],
const Dtype anchors[],
Dtype proposals[],
const int num_anchors,
const int bottom_H, const int bottom_W,
const Dtype img_H, const Dtype img_W,
const Dtype min_box_H, const Dtype min_box_W,
const int feat_stride)
Dtype* p_proposal = proposals;
const int bottom_area = bottom_H * bottom_W;
for (int h = 0; h < bottom_H; ++h) {
for (int w = 0; w < bottom_W; ++w) {
const Dtype x = w * feat_stride;
const Dtype y = h * feat_stride;
const Dtype* p_box = d_anchor4d + h * bottom_W + w;
const Dtype* p_score = bottom4d + h * bottom_W + w;
for (int k = 0; k < num_anchors; ++k) {
const Dtype dx = p_box[(k * 4 + 0) * bottom_area];
const Dtype dy = p_box[(k * 4 + 1) * bottom_area];
const Dtype d_log_w = p_box[(k * 4 + 2) * bottom_area];
const Dtype d_log_h = p_box[(k * 4 + 3) * bottom_area];
p_proposal[0] = x + anchors[k * 4 + 0];
p_proposal[1] = y + anchors[k * 4 + 1];
p_proposal[2] = x + anchors[k * 4 + 2];
p_proposal[3] = y + anchors[k * 4 + 3];
= transform_box(p_proposal,
dx, dy, d_log_w, d_log_h,
img_W, img_H, min_box_W, min_box_H)
* p_score[k * bottom_area]; //这步就去掉越过图片边框的proposals
p_proposal += 5;
} // endfor k
} // endfor w
} // endfor h
template <typename Dtype>
void retrieve_rois_cpu(const int num_rois,
const int item_index,
const Dtype proposals[],
const int roi_indices[],
Dtype rois[],
Dtype roi_scores[])
for (int i = 0; i < num_rois; ++i) {
const Dtype* const proposals_index = proposals + roi_indices[i] * 5;
rois[i * 5 + 0] = item_index;
rois[i * 5 + 1] = proposals_index[0];
rois[i * 5 + 2] = proposals_index[1];
rois[i * 5 + 3] = proposals_index[2];
rois[i * 5 + 4] = proposals_index[3];
if (roi_scores) {
roi_scores[i] = proposals_index[4];
template <typename Dtype>
void ProposalLayer::LayerSetUp(const vector *>& bottom,
const vector *>& top) {
ProposalParameter param = this->layer_param_.proposal_param();
base_size_ = param.base_size();
feat_stride_ = param.feat_stride();
pre_nms_topn_ = param.pre_nms_topn(); //使用nms前RPN proposals最大数量
post_nms_topn_ = param.post_nms_topn();//使用nms后RPN proposals数量
nms_thresh_ = param.nms_thresh(); //阈值 0.7
min_size_ = param.min_size(); //proposal的H W都要大于16
vector ratios(param.ratio_size());
for (int i = 0; i < param.ratio_size(); ++i) {
ratios[i] = param.ratio(i);
vector scales(param.scale_size());
for (int i = 0; i < param.scale_size(); ++i) {
scales[i] = param.scale(i);
vector<int> anchors_shape(2);
anchors_shape[0] = ratios.size() * scales.size();
anchors_shape[1] = 4;
generate_anchors(base_size_, &ratios[0], &scales[0],
ratios.size(), scales.size(),
vector<int> roi_indices_shape(1);
roi_indices_shape[0] = post_nms_topn_;
//输出Top[0],是 R个 regions of interest, each is a 5-tuple (n, x1, y1, x2, y2) ,
//其中n 代表batch index; x1, y1, x2, y2表示矩形的4个点的坐标。
// rois blob : holds R regions of interest, each is a 5 - tuple
// (n, x1, y1, x2, y2) specifying an image batch index n and a
// rectangle(x1, y1, x2, y2)
vector<int> top_shape(2);
top_shape[0] = bottom[0]->shape(0) * post_nms_topn_;
top_shape[1] = 5;
// scores blob : holds scores for R regions of interest
if (top.size() > 1) {
template <typename Dtype>
void ProposalLayer::Forward_cpu(const vector *>& bottom,
const vector *>& top)
CHECK_EQ(bottom[0]->shape(0), 1) << "Only single item batches are supported";
const Dtype* p_bottom_item = bottom[0]->cpu_data();
const Dtype* p_d_anchor_item = bottom[1]->cpu_data();
const Dtype* p_img_info_cpu = bottom[2]->cpu_data();
Dtype* p_roi_item = top[0]->mutable_cpu_data();
Dtype* p_score_item = (top.size() > 1) ? top[1]->mutable_cpu_data() : NULL;
vector<int> proposals_shape(2);
vector<int> top_shape(2);
proposals_shape[0] = 0;
proposals_shape[1] = 5;
top_shape[0] = 0;
top_shape[1] = 5;
for (int n = 0; n < bottom[0]->shape(0); ++n) {
// bottom shape: (2 x num_anchors) x H x W
const int bottom_H = bottom[0]->height();
const int bottom_W = bottom[0]->width();
// input image height & width
const Dtype img_H = p_img_info_cpu[0];
const Dtype img_W = p_img_info_cpu[1];
// scale factor for height & width
const Dtype scale_H = p_img_info_cpu[2];
const Dtype scale_W = p_img_info_cpu[3];
// minimum box width & height
const Dtype min_box_H = min_size_ * scale_H;
const Dtype min_box_W = min_size_ * scale_W;
// number of all proposals = num_anchors * H * W
const int num_proposals = anchors_.shape(0) * bottom_H * bottom_W;
// number of top-n proposals before NMS
const int pre_nms_topn = std::min(num_proposals, pre_nms_topn_);
// number of final RoIs
int num_rois = 0;
// enumerate all proposals
// num_proposals = num_anchors * H * W
// (x1, y1, x2, y2, score) for each proposal
// NOTE: for bottom, only foreground scores are passed
proposals_shape[0] = num_proposals;
p_bottom_item + num_proposals, p_d_anchor_item,
anchors_.cpu_data(), proposals_.mutable_cpu_data(), anchors_.shape(0),
bottom_H, bottom_W, img_H, img_W, min_box_H, min_box_W,
feat_stride_); //得到所有proposals,就是python中的
sort_box(proposals_.mutable_cpu_data(), 0, num_proposals - 1, pre_nms_topn_);
nms_cpu(pre_nms_topn, proposals_.cpu_data(),
roi_indices_.mutable_cpu_data(), &num_rois,
0, nms_thresh_, post_nms_topn_);
num_rois, n, proposals_.cpu_data(), roi_indices_.cpu_data(),
p_roi_item, p_score_item);
top_shape[0] += num_rois;
if (top.size() > 1) {
#ifdef CPU_ONLY
} // namespace caffe
namespace caffe {
/* ROIPoolingLayer - Region of Interest Pooling Layer
template <typename Dtype>
class ROIPoolingLayer : public Layer {
template <typename Dtype>
class SmoothL1LossLayer : public LossLayer {
template <typename Dtype>
class ProposalLayer : public Layer {
explicit ProposalLayer(const LayerParameter& param)
: Layer(param) {}
virtual void LayerSetUp(const vector *>& bottom,
const vector *>& top);
virtual void Reshape(const vector *>& bottom,
const vector *>& top) {
//LOG(FATAL) << "Reshaping happens during the call to forward.";
virtual inline const char* type() const { return "ProposalLayer"; }
virtual void Forward_cpu(const vector *>& bottom,
const vector *>& top);
virtual void Forward_gpu(const vector *>& bottom,
const vector *>& top);
virtual void Backward_cpu(const vector *>& top,
const vector<bool>& propagate_down, const vector *>& bottom) {}
virtual void Backward_gpu(const vector *>& top,
const vector<bool>& propagate_down, const vector *>& bottom) {}
int base_size_;
int feat_stride_;
int pre_nms_topn_;
int post_nms_topn_;
Dtype nms_thresh_;
int min_size_;
Blob anchors_;
Blob proposals_;
Blob<int> roi_indices_;
Blob<int> nms_mask_;
} // namespace caffe
同时在实现proposal layer的过程中,有一部很关键,就是非极大值抑制(nms),在原来的python版本中见lib/nms文件夹,这里,我们还需要添加nms相关hpp和cpp文件。
#include "caffe/util/nms.hpp"
using std::max;
using std::min;
namespace caffe {
template <typename Dtype>
Dtype iou(const Dtype A[], const Dtype B[])
if (A[0] > B[2] || A[1] > B[3] || A[2] < B[0] || A[3] < B[1]) {
return 0;
// overlapped region (= box)
const Dtype x1 = std::max(A[0], B[0]);
const Dtype y1 = std::max(A[1], B[1]);
const Dtype x2 = std::min(A[2], B[2]);
const Dtype y2 = std::min(A[3], B[3]);
// intersection area
const Dtype width = std::max((Dtype)0, x2 - x1 + (Dtype)1);
const Dtype height = std::max((Dtype)0, y2 - y1 + (Dtype)1);
const Dtype area = width * height;
// area of A, B
const Dtype A_area = (A[2] - A[0] + (Dtype)1) * (A[3] - A[1] + (Dtype)1);
const Dtype B_area = (B[2] - B[0] + (Dtype)1) * (B[3] - B[1] + (Dtype)1);
// IoU
return area / (A_area + B_area - area);
template static float iou(const float A[], const float B[]);
template static double iou(const double A[], const double B[]);
template <typename Dtype>
void nms_cpu(const int num_boxes,
const Dtype boxes[],
int index_out[],
int* const num_out,
const int base_index,
const Dtype nms_thresh, const int max_num_out)
int count = 0;
std::vector<char> is_dead(num_boxes);
for (int i = 0; i < num_boxes; ++i) {
is_dead[i] = 0;
for (int i = 0; i < num_boxes; ++i) {
if (is_dead[i]) {
index_out[count++] = base_index + i;
if (count == max_num_out) {
for (int j = i + 1; j < num_boxes; ++j) {
if (!is_dead[j] && iou(&boxes[i * 5], &boxes[j * 5]) > nms_thresh) {
is_dead[j] = 1;
*num_out = count;
void nms_cpu(const int num_boxes,
const float boxes[],
int index_out[],
int* const num_out,
const int base_index,
const float nms_thresh, const int max_num_out);
void nms_cpu(const int num_boxes,
const double boxes[],
int index_out[],
int* const num_out,
const int base_index,
const double nms_thresh, const int max_num_out);
} // namespace caffe
#include "caffe/blob.hpp"
namespace caffe {
void nms_cpu(const int num_boxes,
const Dtype boxes[],
int index_out[],
int* const num_out,
const int base_index,
const Dtype nms_thresh,
const int max_num_out);
void nms_gpu(const int num_boxes,
const Dtype boxes_gpu[],
Blob<int>* const p_mask,
int index_out_cpu[],
int* const num_out,
const int base_index,
const Dtype nms_thresh,
const int max_num_out);
} // namespace caffe
name: "SHLUTEST"
input: "data"
input_dim: 1
input_dim: 1
input_dim: 100
input_dim: 100
# 测试后向过程必须加,不然回传的梯度都会是0
force_backward: true
layer {
name: "shlu1"
type: "Shlu" #这里的名字应该跟你之前定义的一致,要注意大小写
bottom: "data"
top: "shlu1"
cd caffe
addpath ./matlab
model = './shlu_test.prototxt';
# 测试gpu代码时请用GPU模式
net = caffe.Net(model, 'test');
# 生成1*1*100*100维度的正态分布随机数,并填入'data'层的blobs
# 前向过程
# 检查生成的"res"是否是期望的结果
res = net.blobs('shlu1').get_data();
# 后向过程
# diff为自己设置的梯度值,保证维度一致
# 检查生成的"data_diff"是否是期望的结果
data_diff = net.blobs('data').get_diff();
src/caffe/layers/proposal_layer.cpp:321:10: error: redefinition of ‘void caffe::ProposalLayer::Backward_gpu(const std::vector*>&, const std::vector&, const std::vector*>&)’
./include/caffe/util/device_alternate.hpp:17:6: note: in definition of macro ‘STUB_GPU’
void classname::Backward_gpu(const vector*>& top, \
In file included from src/caffe/layers/proposal_layer.cpp:1:0:
./include/caffe/fast_rcnn_layers.hpp:122:16: note: ‘virtual void caffe::ProposalLayer::Backward_gpu(const std::vector*>&, const std::vector&, const std::vector*>&)’ previously declared here
virtual void Backward_gpu(const vector*>& top,
Makefile:575: recipe for target '.build_release/src/caffe/layers/proposal_layer.o' failed
make: *** [.build_release/src/caffe/layers/proposal_layer.o] Error 1
make: *** Waiting for unfinished jobs....
virtual void Backward_gpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom){}
virtual void Backward_gpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom);