本方法参照纯C++版的Faster-Rcnn(通过caffe自定义RPN层实现) 进行设置更改,略有改动。
核心思想:通过添加自定义层(RPN层)代替python层,实现c++版的Faster R-CNN
(1) 添加自定义层 rpn_layer.hpp ,把它放在 caffe/include/caffe/layers/ 目录下
#ifndef CAFFE_RPN_LAYER_HPP_
#define CAFFE_RPN_LAYER_HPP_
#include
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
//#include"opencv2/opencv.hpp"
#define mymax(a,b) ((a)>(b))?(a):(b)
#define mymin(a,b) ((a)>(b))?(b):(a)
namespace caffe {
/**
* @brief implement RPN layer for faster rcnn
*/
template <typename Dtype>
class RPNLayer : public Layer {
public:
explicit RPNLayer(const LayerParameter& param)
: Layer(param) {
m_score_.reset(new Blob());
m_box_.reset(new Blob());
local_anchors_.reset(new Blob());
}
virtual void LayerSetUp(const vector *>& bottom,
const vector *>& top);
virtual void Reshape(const vector *>& bottom,
const vector *>& top){}
virtual inline const char* type() const { return "RPN"; }
struct abox{
Dtype batch_ind;
Dtype x1;
Dtype y1;
Dtype x2;
Dtype y2;
Dtype score;
bool operator <(const abox&tmp) const{
return score < tmp.score;
}
};
protected:
virtual void Forward_cpu(const vector *>& bottom,
const vector *>& top);
//virtual void Forward_gpu(const vector*>& bottom,
//const vector*>& top);
virtual void Backward_cpu(const vector *>& top,
const vector<bool>& propagate_down, const vector *>& bottom){};
int feat_stride_;
int base_size_;
int min_size_;
int pre_nms_topN_;
int post_nms_topN_;
float nms_thresh_;
vector<int> anchor_scales_;
vector<float> ratios_;
vector<vector<float> > gen_anchors_;
int *anchors_;
int anchors_nums_;
int src_height_;
int src_width_;
float src_scale_;
int map_width_;
int map_height_;
shared_ptr > m_score_;
shared_ptr > m_box_;
shared_ptr >local_anchors_;
void generate_anchors();
vector<vector<float> > ratio_enum(vector<float>);
vector<float> whctrs(vector<float>);
vector<float> mkanchor(float w,float h,float x_ctr,float y_ctr);
vector<vector<float> > scale_enum(vector<float>);
//cv::Mat proposal_local_anchor(int width, int height);
void proposal_local_anchor();
void bbox_tranform_inv();
cv::Mat bbox_tranform_inv(cv::Mat local_anchors, cv::Mat boxs_delta);
void nms(std::vector &input_boxes, float nms_thresh);
void filter_boxs(cv::Mat& pre_box, cv::Mat& score, vector & aboxes);
void filter_boxs(vector & aboxes);
};
} // namespace caffe
#endif // CAFFE_RPN_LAYER_HPP_
(2)然后是源文件 rpn_layer.cpp, 放在 caffe/src/caffe/layers/ 目录下
#include
#include
#include "caffe/layers/rpn_layer.hpp"
#include "caffe/util/math_functions.hpp"
#include
int debug = 0;
int tmp[9][4] = {
{ -83, -39, 100, 56 },
{ -175, -87, 192, 104 },
{ -359, -183, 376, 200 },
{ -55, -55, 72, 72 },
{ -119, -119, 136, 136 },
{ -247, -247, 264, 264 },
{ -35, -79, 52, 96 },
{ -79, -167, 96, 184 },
{ -167, -343, 184, 360 }
};
namespace caffe {
template <typename Dtype>
void RPNLayer::LayerSetUp(
const vector *>& bottom, const vector *>& top) {
anchor_scales_.clear();
ratios_.clear();
feat_stride_ = this->layer_param_.rpn_param().feat_stride();
base_size_ = this->layer_param_.rpn_param().basesize();
min_size_ = this->layer_param_.rpn_param().boxminsize();
pre_nms_topN_ = this->layer_param_.rpn_param().per_nms_topn();
post_nms_topN_ = this->layer_param_.rpn_param().post_nms_topn();
nms_thresh_ = this->layer_param_.rpn_param().nms_thresh();
int scales_num = this->layer_param_.rpn_param().scale_size();
for (int i = 0; i < scales_num; ++i)
{
anchor_scales_.push_back(this->layer_param_.rpn_param().scale(i));
}
int ratios_num = this->layer_param_.rpn_param().ratio_size();
for (int i = 0; i < ratios_num; ++i)
{
ratios_.push_back(this->layer_param_.rpn_param().ratio(i));
}
//anchors_nums_ = 9;
//anchors_ = new int[anchors_nums_ * 4];
//memcpy(anchors_, tmp, 9 * 4 * sizeof(int));
generate_anchors();
anchors_nums_ = gen_anchors_.size();
anchors_ = new int[anchors_nums_ * 4];
for (int i = 0; ifor (int j = 0; j4+j] = gen_anchors_[i][j];
}
}
top[0]->Reshape(1, 5, 1, 1);
if (top.size() > 1)
{
top[1]->Reshape(1, 1, 1, 1);
}
}
template <typename Dtype>
void RPNLayer::generate_anchors(){
//generate base anchor
vector<float> base_anchor;
base_anchor.push_back(0);
base_anchor.push_back(0);
base_anchor.push_back(base_size_ - 1);
base_anchor.push_back(base_size_ - 1);
//enum ratio anchors
vector<vector<float> >ratio_anchors = ratio_enum(base_anchor);
for (int i = 0; i < ratio_anchors.size(); ++i)
{
vector<vector<float> > tmp = scale_enum(ratio_anchors[i]);
gen_anchors_.insert(gen_anchors_.end(), tmp.begin(), tmp.end());
}
}
template <typename Dtype>
vector<vector<float> > RPNLayer::scale_enum(vector<float> anchor){
vector<vector<float> > result;
vector<float> reform_anchor = whctrs(anchor);
float x_ctr = reform_anchor[2];
float y_ctr = reform_anchor[3];
float w = reform_anchor[0];
float h = reform_anchor[1];
for (int i = 0; i < anchor_scales_.size(); ++i)
{
float ws = w * anchor_scales_[i];
float hs = h * anchor_scales_[i];
vector<float> tmp = mkanchor(ws, hs, x_ctr, y_ctr);
result.push_back(tmp);
}
return result;
}
template <typename Dtype>
vector<vector<float> > RPNLayer::ratio_enum(vector<float> anchor){
vector<vector<float> > result;
vector<float> reform_anchor = whctrs(anchor);
float x_ctr = reform_anchor[2];
float y_ctr = reform_anchor[3];
float size = reform_anchor[0] * reform_anchor[1];
for (int i = 0; i < ratios_.size(); ++i)
{
float size_ratios = size / ratios_[i];
float ws = round(sqrt(size_ratios));
float hs = round(ws*ratios_[i]);
vector<float> tmp = mkanchor(ws, hs, x_ctr, y_ctr);
result.push_back(tmp);
}
return result;
}
template <typename Dtype>
vector<float> RPNLayer::mkanchor(float w, float h, float x_ctr, float y_ctr){
vector<float> tmp;
tmp.push_back(x_ctr - 0.5*(w - 1));
tmp.push_back(y_ctr - 0.5*(h - 1));
tmp.push_back(x_ctr + 0.5*(w - 1));
tmp.push_back(y_ctr + 0.5*(h - 1));
return tmp;
}
template <typename Dtype>
vector<float> RPNLayer::whctrs(vector<float> anchor){
vector<float> result;
result.push_back(anchor[2] - anchor[0] + 1); //w
result.push_back(anchor[3] - anchor[1] + 1); //h
result.push_back((anchor[2] + anchor[0]) / 2); //ctrx
result.push_back((anchor[3] + anchor[1]) / 2); //ctry
return result;
}
/*template
cv::Mat RPNLayer::proposal_local_anchor(int width, int height)
{
Blob shift;
cv::Mat shitf_x(height, width, CV_32SC1);
cv::Mat shitf_y(height, width, CV_32SC1);
for (size_t i = 0; i < width; i++)
{
for (size_t j = 0; j < height; j++)
{
shitf_x.at(j, i) = i * feat_stride_;
shitf_y.at(j, i) = j * feat_stride_;
}
}
shift.Reshape(anchors_nums_, width*height, 4, 1);
float *p = shift.mutable_cpu_diff(), *a = shift.mutable_cpu_data();
for (int i = 0; i < height*width; i++)
{
for (int j = 0; j < anchors_nums_; j++)
{
size_t num = i * 4 + j * 4 * height*width;
p[num + 0] = -shitf_x.at(i / shitf_x.cols, i % shitf_x.cols);
p[num + 2] = -shitf_x.at(i / shitf_x.cols, i % shitf_x.cols);
p[num + 1] = -shitf_y.at(i / shitf_y.cols, i % shitf_y.cols);
p[num + 3] = -shitf_y.at(i / shitf_y.cols, i % shitf_y.cols);
a[num + 0] = anchors_[j * 4 + 0];
a[num + 1] = anchors_[j * 4 + 1];
a[num + 2] = anchors_[j * 4 + 2];
a[num + 3] = anchors_[j * 4 + 3];
}
}
shift.Update();
cv::Mat loacl_anchors(anchors_nums_ * height*width, 4, CV_32FC1);
size_t num = 0;
for (int i = 0; i < height; ++i)
{
for (int j = 0; j < width; ++j)
{
for (int c = 0; c < anchors_nums_; ++c)
{
for (int k = 0; k < 4; ++k)
{
loacl_anchors.at((i*width + j)*anchors_nums_+c, k)= shift.data_at(c, i*width + j, k, 0);
}
}
}
}
return loacl_anchors;
}*/
template <typename Dtype>
void RPNLayer::proposal_local_anchor(){
int length = mymax(map_width_, map_height_);
int step = map_width_*map_height_;
int *map_m = new int[length];
for (int i = 0; i < length; ++i)
{
map_m[i] = i*feat_stride_;
}
Dtype *shift_x = new Dtype[step];
Dtype *shift_y = new Dtype[step];
for (int i = 0; i < map_height_; ++i)
{
for (int j = 0; j < map_width_; ++j)
{
shift_x[i*map_width_ + j] = map_m[j];
shift_y[i*map_width_ + j] = map_m[i];
}
}
local_anchors_->Reshape(1, anchors_nums_ * 4, map_height_, map_width_);
Dtype *a = local_anchors_->mutable_cpu_data();
for (int i = 0; i < anchors_nums_; ++i)
{
caffe_set(step, Dtype(anchors_[i * 4 + 0]), a + (i * 4 + 0) *step);
caffe_set(step, Dtype(anchors_[i * 4 + 1]), a + (i * 4 + 1) *step);
caffe_set(step, Dtype(anchors_[i * 4 + 2]), a + (i * 4 + 2) *step);
caffe_set(step, Dtype(anchors_[i * 4 + 3]), a + (i * 4 + 3) *step);
caffe_axpy(step, Dtype(1), shift_x, a + (i * 4 + 0)*step);
caffe_axpy(step, Dtype(1), shift_x, a + (i * 4 + 2)*step);
caffe_axpy(step, Dtype(1), shift_y, a + (i * 4 + 1)*step);
caffe_axpy(step, Dtype(1), shift_y, a + (i * 4 + 3)*step);
}
}
template<typename Dtype>
void RPNLayer::filter_boxs(cv::Mat& pre_box, cv::Mat& score, vector & aboxes)
{
float localMinSize=min_size_*src_scale_;
aboxes.clear();
for (int i = 0; i < pre_box.rows; i++)
{
int widths = pre_box.at<float>(i, 2) - pre_box.at<float>(i, 0) + 1;
int heights = pre_box.at<float>(i, 3) - pre_box.at<float>(i, 1) + 1;
if (widths >= localMinSize || heights >= localMinSize)
{
abox tmp;
tmp.x1 = pre_box.at<float>(i, 0);
tmp.y1 = pre_box.at<float>(i, 1);
tmp.x2 = pre_box.at<float>(i, 2);
tmp.y2 = pre_box.at<float>(i, 3);
tmp.score = score.at<float>(i, 0);
aboxes.push_back(tmp);
}
}
}
template<typename Dtype>
void RPNLayer::filter_boxs(vector & aboxes)
{
float localMinSize = min_size_*src_scale_;
aboxes.clear();
int map_width = m_box_->width();
int map_height = m_box_->height();
int map_channel = m_box_->channels();
const Dtype *box = m_box_->cpu_data();
const Dtype *score = m_score_->cpu_data();
int step = 4 * map_height*map_width;
int one_step = map_height*map_width;
int offset_w, offset_h, offset_x, offset_y, offset_s;
for (int h = 0; h < map_height; ++h)
{
for (int w = 0; w < map_width; ++w)
{
offset_x = h*map_width + w;
offset_y = offset_x + one_step;
offset_w = offset_y + one_step;
offset_h = offset_w + one_step;
offset_s = one_step*anchors_nums_+h*map_width + w;
for (int c = 0; c < map_channel / 4; ++c)
{
Dtype width = box[offset_w], height = box[offset_h];
if (width < localMinSize || height < localMinSize)
{
}
else
{
abox tmp;
tmp.batch_ind = 0;
tmp.x1 = box[offset_x] - 0.5*width;
tmp.y1 = box[offset_y] - 0.5*height;
tmp.x2 = box[offset_x] + 0.5*width;
tmp.y2 = box[offset_y] + 0.5*height;
tmp.x1 = mymin(mymax(tmp.x1, 0), src_width_);
tmp.y1 = mymin(mymax(tmp.y1, 0), src_height_);
tmp.x2 = mymin(mymax(tmp.x2, 0), src_width_);
tmp.y2 = mymin(mymax(tmp.y2, 0), src_height_);
tmp.score = score[offset_s];
aboxes.push_back(tmp);
}
offset_x += step;
offset_y += step;
offset_w += step;
offset_h += step;
offset_s += one_step;
}
}
}
}
template<typename Dtype>
void RPNLayer::bbox_tranform_inv(){
int channel = m_box_->channels();
int height = m_box_->height();
int width = m_box_->width();
int step = height*width;
Dtype * a = m_box_->mutable_cpu_data();
Dtype * b = local_anchors_->mutable_cpu_data();
for (int i = 0; i < channel / 4; ++i)
{
caffe_axpy(2*step, Dtype(-1), b + (i * 4 + 0)*step, b + (i * 4 + 2)*step);
caffe_add_scalar(2 * step, Dtype(1), b + (i * 4 + 2)*step);
caffe_axpy(2*step, Dtype(0.5), b + (i * 4 + 2)*step, b + (i * 4 + 0)*step);
caffe_mul(2 * step, b + (i * 4 + 2)*step, a + (i * 4 + 0)*step, a + (i * 4 + 0)*step);
caffe_add(2 * step, b + (i * 4 + 0)*step, a + (i * 4 + 0)*step, a + (i * 4 + 0)*step);
caffe_exp(2*step, a + (i * 4 + 2)*step, a + (i * 4 + 2)*step);
caffe_mul(2 * step, b + (i * 4 + 2)*step, a + (i * 4 + 2)*step, a + (i * 4 + 2)*step);
}
}
template<typename Dtype>
void RPNLayer::nms(std::vector &input_boxes, float nms_thresh){
std::vector<float>vArea(input_boxes.size());
for (int i = 0; i < input_boxes.size(); ++i)
{
vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
}
for (int i = 0; i < input_boxes.size(); ++i)
{
for (int j = i + 1; j < input_boxes.size();)
{
float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1);
float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1);
float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2);
float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2);
float w = std::max(float(0), xx2 - xx1 + 1);
float h = std::max(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= nms_thresh)
{
input_boxes.erase(input_boxes.begin() + j);
vArea.erase(vArea.begin() + j);
}
else
{
j++;
}
}
}
}
template <typename Dtype>
void RPNLayer::Forward_cpu(
const vector *>& bottom,
const vector *>& top) {
map_width_ = bottom[1]->width();
map_height_ = bottom[1]->height();
//int channels = bottom[1]->channels();
//get boxs_delta,向右。
m_box_->CopyFrom(*(bottom[1]), false, true);
/*cv::Mat boxs_delta(height*width*anchors_nums_, 4, CV_32FC1);
for (int i = 0; i < height; ++i)
{
for (int j = 0; j < width; ++j)
{
for (int k = 0; k < anchors_nums_; ++k)
{
for (int c = 0; c < 4; ++c)
{
boxs_delta.at((i*width + j)*anchors_nums_ + k, c) = bottom[1]->data_at(0, k*4 + c, i, j);
}
}
}
}*/
//get sores 向右,前面anchors_nums_个位bg的得分,后面anchors_nums_为fg得分,我们需要的是后面的。
m_score_->CopyFrom(*(bottom[0]),false,true);
/*cv::Mat scores(height*width*anchors_nums_, 1, CV_32FC1);
for (int i = 0; i < height; ++i)
{
for (int j = 0; j < width; ++j)
{
for (int k = 0; k < anchors_nums_; ++k)
{
scores.at((i*width + j)*anchors_nums_+k, 0) = bottom[0]->data_at(0, k + anchors_nums_, i, j);
}
}
}*/
//get im_info
src_height_ = bottom[2]->data_at(0, 0,0,0);
src_width_ = bottom[2]->data_at(0, 1,0,0);
src_scale_ = bottom[2]->data_at(0, 2, 0, 0);
//gen local anchors 向右
proposal_local_anchor();
//cv::Mat local_anchors = proposal_local_anchor(width, height);
//Convert anchors into proposals via bbox transformations
bbox_tranform_inv();
/*for (int i = 0; i < pre_box.rows; ++i)
{
if (pre_box.at(i, 0) < 0) pre_box.at(i, 0) = 0;
if (pre_box.at(i, 0) > (src_width_ - 1)) pre_box.at(i, 0) = src_width_ - 1;
if (pre_box.at(i, 2) < 0) pre_box.at(i, 2) = 0;
if (pre_box.at(i, 2) > (src_width_ - 1)) pre_box.at(i, 2) = src_width_ - 1;
if (pre_box.at(i, 1) < 0) pre_box.at(i, 1) = 0;
if (pre_box.at(i, 1) > (src_height_ - 1)) pre_box.at(i, 1) = src_height_ - 1;
if (pre_box.at(i, 3) < 0) pre_box.at(i, 3) = 0;
if (pre_box.at(i, 3) > (src_height_ - 1)) pre_box.at(i, 3) = src_height_ - 1;
}*/
vector aboxes;
filter_boxs(aboxes);
//clock_t start, end;
//start = clock();
std::sort(aboxes.rbegin(), aboxes.rend()); //降序
if (pre_nms_topN_ > 0)
{
int tmp = mymin(pre_nms_topN_, aboxes.size());
aboxes.erase(aboxes.begin() + tmp, aboxes.end());
}
nms(aboxes,nms_thresh_);
//end = clock();
//std::cout << "sort nms:" << (double)(end - start) / CLOCKS_PER_SEC << std::endl;
if (post_nms_topN_ > 0)
{
int tmp = mymin(post_nms_topN_, aboxes.size());
aboxes.erase(aboxes.begin() + tmp, aboxes.end());
}
top[0]->Reshape(aboxes.size(),5,1,1);
Dtype *top0 = top[0]->mutable_cpu_data();
for (int i = 0; i < aboxes.size(); ++i)
{
//caffe_copy(aboxes.size() * 5, (Dtype*)aboxes.data(), top0);
top0[0] = aboxes[i].batch_ind;
top0[1] = aboxes[i].x1;
top0[2] = aboxes[i].y1;
top0[3] = aboxes[i].x2;
top0[4] = aboxes[i].y2;
top0 += top[0]->offset(1);
}
if (top.size()>1)
{
top[1]->Reshape(aboxes.size(), 1,1,1);
Dtype *top1 = top[1]->mutable_cpu_data();
for (int i = 0; i < aboxes.size(); ++i)
{
top1[0] = aboxes[i].score;
top1 += top[1]->offset(1);
}
}
}
#ifdef CPU_ONLY
STUB_GPU(RPNLayer);
#endif
INSTANTIATE_CLASS(RPNLayer);
REGISTER_LAYER_CLASS(RPN);
} // namespace caffe
(3) 添加自定义层 roi pooling layer、smooth_L1_loss_layer(此层仅供训练时使用)
注意:参照的blog中提到需要添加roi pooling层,其实py-faster-rcnn的caffe-fast-rcnn中已经含有roi pooling layer,故此步骤如果用py-faster-rcnn的caffe-fast-rcnn可以不用添加,保留原始代码即可。如果用最新caffe,则需做如下操作:
1、将 py-faster-rcnn/caffe-fast-rcnn/src/caffe/layers/roi_pooling_layer.cpp 、roi_pooling_layer.cu 、smooth_L1_loss_layer.cpp、smooth_L1_loss_layer.cu 四个文件放入 caffe//src/caffe/layers/ 下
注意: roi pooling layer 用 py-faster-rcnn原本的代码也可以,用参照的blog里的代码也可。
2、将 py-faster-rcnn/caffe-fast-rcnn/include/caffe/fast_rcnn_layers.hpp、data_reader.hpp 放入 caffe/include/caffe/ 下
3、将 py-faster-rcnn/caffe-fast-rcnn/src/caffe/data_reader.cpp 放入 caffe//src/caffe/ 下
(4)在caffe/src/caffe/proto/caffe.proto 中声明这三个类
在message LayerParameter 中添加:
optional ROIPoolingParameter roi_pooling_param = 8266711;
optional SmoothL1LossParameter smooth_l1_loss_param = 8266712;
optional RPNParameter rpn_param = 8266713;
在这个文件的最末尾,定义具体的参数
message ROIPoolingParameter {
optional uint32 pooled_h = 1 [default = 0];
optional uint32 pooled_w = 2 [default = 0];
optional float spatial_scale = 3 [default = 1];
}
message RPNParameter {
optional uint32 feat_stride = 1;
optional uint32 basesize = 2;
repeated uint32 scale = 3;
repeated float ratio = 4;
optional uint32 boxminsize =5;
optional uint32 per_nms_topn = 9;
optional uint32 post_nms_topn = 11;
optional float nms_thresh = 8;
}
message SmoothL1LossParameter {
// SmoothL1Loss(x) =
// 0.5 * (sigma * x) ** 2 -- if x < 1.0 / sigma / sigma
// |x| - 0.5 / sigma / sigma -- otherwise
optional float sigma = 1 [default = 1];
}
(5)因为自定义层使用了RPN层,为了以后程序中各处都能使用该层,所以得在common.hpp和common.cpp文件的最末尾,添加对应的代码,注意这里的namespace RPN是和namespace caffe同一级的
1、头文件common.hpp里添加
namespace RPN{
struct abox
{
float x1;
float y1;
float x2;
float y2;
float score;
bool operator <(const abox&tmp) const{
return score < tmp.score;
}
};
void nms(std::vector & input_boxes,float nms_thresh);
cv::Mat bbox_tranform_inv(cv::Mat, cv::Mat);
} // namespace RPN
源文件common.cpp里,为了防止说找不到cv::Mat类型的错误,添加opencv头文件
#include
using namespace cv;
在} // namespace caffe后添加:
namespace RPN{
cv::Mat bbox_tranform_inv(cv::Mat local_anchors, cv::Mat boxs_delta){
cv::Mat pre_box(local_anchors.rows, local_anchors.cols, CV_32FC1);
for (int i = 0; i < local_anchors.rows; i++)
{
double pred_ctr_x, pred_ctr_y, src_ctr_x, src_ctr_y;
double dst_ctr_x, dst_ctr_y, dst_scl_x, dst_scl_y;
double src_w, src_h, pred_w, pred_h;
src_w = local_anchors.at<float>(i, 2) - local_anchors.at<float>(i, 0) + 1;
src_h = local_anchors.at<float>(i, 3) - local_anchors.at<float>(i, 1) + 1;
src_ctr_x = local_anchors.at<float>(i, 0) + 0.5 * src_w;
src_ctr_y = local_anchors.at<float>(i, 1) + 0.5 * src_h;
dst_ctr_x = boxs_delta.at<float>(i, 0);
dst_ctr_y = boxs_delta.at<float>(i, 1);
dst_scl_x = boxs_delta.at<float>(i, 2);
dst_scl_y = boxs_delta.at<float>(i, 3);
pred_ctr_x = dst_ctr_x*src_w + src_ctr_x;
pred_ctr_y = dst_ctr_y*src_h + src_ctr_y;
pred_w = exp(dst_scl_x) * src_w;
pred_h = exp(dst_scl_y) * src_h;
pre_box.at<float>(i, 0) = pred_ctr_x - 0.5*pred_w;
pre_box.at<float>(i, 1) = pred_ctr_y - 0.5*pred_h;
pre_box.at<float>(i, 2) = pred_ctr_x + 0.5*pred_w;
pre_box.at<float>(i, 3) = pred_ctr_y + 0.5*pred_h;
}
return pre_box;
}
void nms(std::vector &input_boxes, float nms_thresh){
std::vector<float>vArea(input_boxes.size());
for (int i = 0; i < input_boxes.size(); ++i)
{
vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
}
for (int i = 0; i < input_boxes.size(); ++i)
{
for (int j = i + 1; j < input_boxes.size();)
{
float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1);
float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1);
float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2);
float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2);
float w = std::max(float(0), xx2 - xx1 + 1);
float h = std::max(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= nms_thresh)
{
input_boxes.erase(input_boxes.begin() + j);
vArea.erase(vArea.begin() + j);
}
else
{
j++;
}
}
}
}
}
(6) 更改caffe/src/caffe/util/blocking_queue.cpp
增加一个头文件
#include "caffe/data_reader.hpp"
其次,在最末尾处,将如下代码
template class BlockingQueue<Batch<float>*>;
template class BlockingQueue<Batch<double>*>;
更改为:
template class BlockingQueue<Batch<float>*>;
template class BlockingQueue<Batch<double>*>;
template class BlockingQueue<Datum*>;
template class BlockingQueue<shared_ptr<DataReader::QueuePair> >;
(7)重新生成caffe.pb.h
cd $caffe/include/caffe/
mkdir proto
cd $caffe/src/caffe/proto
protoc --cpp_out=$your_home/caffe/include/caffe/proto/ caffe.proto
执行完后会在$caffe/include/caffe/proto/下生成2个文件,caffe.pb.h和caffe.pb.cc。
(8)编译caffe
make clean
make -j8
make pycaffe (不需要用python的可以不用,反正编译耗时也不长)
(9)环境已经配置好了,现在我们再加个类,用来对图片进行检测吧!编写头文件ObjectDetector.hpp
#define OBJECTDETECTOR_H
#define INPUT_SIZE_NARROW 600
#define INPUT_SIZE_LONG 1000
#include
#include
#include
#include
#include
#include
#include
using namespace std;
class ObjectDetector
{
public:
ObjectDetector(const std::string &model_file, const std::string &weights_file); //构造函数
//对一张图片,进行检测,将结果保存进map数据结构里,分别表示每个类别对应的目标框,如果需要分数信息,则计算分数
map<int,vector > detect(const cv::Mat& image, map<int,vector<float> >* score=NULL);
private:
boost::shared_ptr< caffe::Net<float> > net_;
int class_num_; //类别数+1 ,官方给的demo 是20+1类
};
#endif
(10) 源文件ObjectDetector.cpp
#include "ObjectDetector.hpp"
#include
#include
#include
#include
using std::string;
using std::vector;
using namespace caffe;
using std::max;
using std::min;
ObjectDetector::ObjectDetector(const std::string &model_file,const std::string &weights_file){
#ifdef CPU_ONLY
Caffe::set_mode(Caffe::CPU);
#else
Caffe::set_mode(Caffe::GPU);
#endif
net_.reset(new Net<float>(model_file, TEST));
net_->CopyTrainedLayersFrom(weights_file);
this->class_num_ = net_->blob_by_name("cls_prob")->channels(); //求得类别数+1
}
//对一张图片,进行检测,将结果保存进map数据结构里,分别表示每个类别对应的目标框,如果需要分数信息,则计算分数
map<int,vector > ObjectDetector::detect(const cv::Mat& image,map<int,vector<float> >* objectScore){
if(objectScore!=NULL) //如果需要保存置信度
objectScore->clear();
float CONF_THRESH = 0.8; //置信度阈值
float NMS_THRESH = 0.3; //非极大值抑制阈值
int max_side = max(image.rows, image.cols); //分别求出图片宽和高的较大者
int min_side = min(image.rows, image.cols);
float max_side_scale = float(max_side) / float(INPUT_SIZE_LONG); //分别求出缩放因子
float min_side_scale = float(min_side) / float(INPUT_SIZE_NARROW);
float max_scale = max(max_side_scale, min_side_scale);
float img_scale = float(1) / max_scale;
int height = int(image.rows * img_scale);
int width = int(image.cols * img_scale);
int num_out;
cv::Mat cv_resized;
image.convertTo(cv_resized, CV_32FC3);
cv::resize(cv_resized, cv_resized, cv::Size(width, height));
cv::Mat mean(height, width, cv_resized.type(), cv::Scalar(102.9801, 115.9465, 122.7717));
cv::Mat normalized;
subtract(cv_resized, mean, normalized);
float im_info[3];
im_info[0] = height;
im_info[1] = width;
im_info[2] = img_scale;
shared_ptrfloat> > input_layer = net_->blob_by_name("data");
input_layer->Reshape(1, normalized.channels(), height, width);
net_->Reshape();
float* input_data = input_layer->mutable_cpu_data();
vector input_channels;
for (int i = 0; i < input_layer->channels(); ++i) {
cv::Mat channel(height, width, CV_32FC1, input_data);
input_channels.push_back(channel);
input_data += height * width;
}
cv::split(normalized, input_channels);
net_->blob_by_name("im_info")->set_cpu_data(im_info);
net_->ForwardFrom(0); //进行网络前向传播
int num = net_->blob_by_name("rois")->num(); //产生的 ROI 个数,比如为 13949个ROI
const float *rois_data = net_->blob_by_name("rois")->cpu_data(); //维度比如为:13949*5*1*1
int num1 = net_->blob_by_name("bbox_pred")->num(); //预测的矩形框 维度为 13949*84
cv::Mat rois_box(num, 4, CV_32FC1);
for (int i = 0; i < num; ++i)
{
rois_box.at<float>(i, 0) = rois_data[i * 5 + 1] / img_scale;
rois_box.at<float>(i, 1) = rois_data[i * 5 + 2] / img_scale;
rois_box.at<float>(i, 2) = rois_data[i * 5 + 3] / img_scale;
rois_box.at<float>(i, 3) = rois_data[i * 5 + 4] / img_scale;
}
shared_ptrfloat> > bbox_delt_data = net_->blob_by_name("bbox_pred"); // 13949*84
shared_ptrfloat> > score = net_->blob_by_name("cls_prob"); // 3949*21
map<int,vector > label_objs; //每个类别,对应的检测目标框
for (int i = 1; i < class_num_; ++i){ //对每个类,进行遍历
cv::Mat bbox_delt(num, 4, CV_32FC1);
for (int j = 0; j < num; ++j){
bbox_delt.at<float>(j, 0) = bbox_delt_data->data_at(j, i * 4 + 0, 0, 0);
bbox_delt.at<float>(j, 1) = bbox_delt_data->data_at(j, i * 4 + 1, 0, 0);
bbox_delt.at<float>(j, 2) = bbox_delt_data->data_at(j, i * 4 + 2, 0, 0);
bbox_delt.at<float>(j, 3) = bbox_delt_data->data_at(j, i * 4 + 3, 0, 0);
}
cv::Mat box_class = RPN::bbox_tranform_inv(rois_box, bbox_delt);
vector aboxes; //对于 类别i,检测出的矩形框保存在这
for (int j = 0; j < box_class.rows; ++j){
if (box_class.at<float>(j, 0) < 0) box_class.at<float>(j, 0) = 0;
if (box_class.at<float>(j, 0) > (image.cols - 1)) box_class.at<float>(j, 0) = image.cols - 1;
if (box_class.at<float>(j, 2) < 0) box_class.at<float>(j, 2) = 0;
if (box_class.at<float>(j, 2) > (image.cols - 1)) box_class.at<float>(j, 2) = image.cols - 1;
if (box_class.at<float>(j, 1) < 0) box_class.at<float>(j, 1) = 0;
if (box_class.at<float>(j, 1) > (image.rows - 1)) box_class.at<float>(j, 1) = image.rows - 1;
if (box_class.at<float>(j, 3) < 0) box_class.at<float>(j, 3) = 0;
if (box_class.at<float>(j, 3) > (image.rows - 1)) box_class.at<float>(j, 3) = image.rows - 1;
RPN::abox tmp;
tmp.x1 = box_class.at<float>(j, 0);
tmp.y1 = box_class.at<float>(j, 1);
tmp.x2 = box_class.at<float>(j, 2);
tmp.y2 = box_class.at<float>(j, 3);
tmp.score = score->data_at(j, i, 0, 0);
aboxes.push_back(tmp);
}
std::sort(aboxes.rbegin(), aboxes.rend());
RPN::nms(aboxes, NMS_THRESH); //与非极大值抑制消除对于的矩形框
for (int k = 0; k < aboxes.size();){
if (aboxes[k].score < CONF_THRESH)
aboxes.erase(aboxes.begin() + k);
else
k++;
}
//################ 将类别i的所有检测框,保存
vector rect(aboxes.size()); //对于类别i,检测出的矩形框
for(int ii=0;ii//################ 将类别i的所有检测框的打分,保存
if(objectScore!=NULL){ //################ 将类别i的所有检测框的打分,保存
vector<float> tmp(aboxes.size()); //对于 类别i,检测出的矩形框的得分
for(int ii=0;iiinsert(pair<int,vector<float> >(i,tmp));
}
}
return label_objs;
}
注意:更改之处:net_->Forward(); 更改为 net_->ForwardFrom(0);
这里的函数返回的是一个map对象,每一个键(类别label),对应一个矩形框向量。比如,一个20类检测任务,而一张图片里有3个人(标签是1),和2辆车(标签是5),那函数会返回一个map,其中有两个键值对,键1对应的值是一个3维的矩形框向量,分别代表着3个人的矩形框;键5对应的值是一个2维的矩形框向量,分别代表的是2辆车的矩形框。同时,函数还接受一个可选参数,可以返回每个矩形框各自对应的置信度。
(11)写个主函数,测试下效果,首先更改下网络描述文件test.prototxt,因为我们不需要python层了,那我们打开这个文件,定位到 Python层,
layer {
name: 'proposal'
type: 'Python'
bottom: 'rpn_cls_prob_reshape'
bottom: 'rpn_bbox_pred'
bottom: 'im_info'
top: 'rois'
python_param {
module: 'rpn.proposal_layer'
layer: 'ProposalLayer'
param_str: "'feat_stride': 16"
}
}
把它修改为
layer {
name: "proposal"
type: "RPN"
bottom: "rpn_cls_prob_reshape"
bottom: "rpn_bbox_pred"
bottom: "im_info"
top: "rois"
rpn_param {
feat_stride : 16
basesize : 16
scale : 8
scale : 16
scale : 32
ratio : 0.5
ratio : 1
ratio : 2
boxminsize :16
per_nms_topn : 6000 #参照的blog这个参数设为0,和Python不一样,造成c++效果比python差很多。所以我更改为6000,按照原版python参数设置更改了
post_nms_topn : 300 #同上
nms_thresh : 0.7 #同上
}
}
(12)编写主函数进行测试
#include "ObjectDetector.hpp"
#include
#include
#include
#include
using namespace cv;
using namespace std;
const double clocksPerMillis = double(CLOCKS_PER_SEC) / 1000;
string num2str(float i){
stringstream ss;
ss<return ss.str();
}
// void drawRectOnImage(cv::Mat& im, Rect pos) {
// int thickness = 2;
// Scalar color(0, 0, 255);// color is blue.
// Mat overlay;
// im.copyTo(overlay);
// rectangle(overlay, pos, color, thickness);
// double alpha = 0.5;
// cv::addWeighted(overlay, alpha, im, 1 - alpha, 0, im);
// }
// voffset: vertical offset
int drawTextOnImage(cv::Mat& im, Rect pos, float voffset, string text) {
int fontFace = CV_FONT_HERSHEY_SIMPLEX; //FONT_HERSHEY_PLAIN;
double fontScale = 1.0;
int thickness = 0.5;
int baseline = 0;
Size textSize = getTextSize(text, fontFace, fontScale, thickness, &baseline);
Mat overlay;
im.copyTo(overlay);
rectangle(overlay, Point(pos.x, pos.y + voffset),
Point(pos.x + textSize.width, pos.y + textSize.height + thickness + voffset),Scalar(255, 128, 128), CV_FILLED);
Point textOrg(pos.x, pos.y + textSize.height + thickness + voffset);
cv::putText(overlay, text, textOrg,fontFace, fontScale, Scalar(255, 255, 255), thickness, LINE_AA);
double alpha = 0.75;
cv::addWeighted(overlay, alpha, im, 1 - alpha, 0, im);
return textSize.height + thickness;
}
int main(int argc,char **argv){
::google::InitGoogleLogging(argv[0]);
#ifdef CPU_ONLY
cout<<"Use CPU\n";
#else
cout<<"Use GPU\n";
#endif
ObjectDetector detect("test.prototxt", "xx.caffemodel");
long start_time = clock();
Mat img=imread("xx.jpg");
if (img.empty()) {
cout << "img is empty! " << endl;
return 1;
}
map<int,vector<float> > score;
map<int,vector > label_objs=detect.detect(img,&score); //目标检测,同时保存每个框的置信度
string label_str = "";
for(map<int,vector >::iterator it=label_objs.begin();it!=label_objs.end();it++){
int label=it->first; //标签
vector rects=it->second; //检测框
for(int j=0;j// cout << j << endl;
// drawRectOnImage(img, rects[j]);
// cv::rectangle(overlay, rects[j], Scalar(255, 128, 128), CV_FILLED);
cout << rects[j] << endl;
cout << "score is " << score[label][j] << endl;
rectangle(img,rects[j],Scalar(0,0,255),2); //画出矩形框
switch (label){
case 1: label_str = "label1"; break;
case 2: label_str = "label2";break;
case 3: label_str = "label3";break;
default: label_str = ""; cout << "label_str is error!" << endl; return 1;
}
// string txt= label_str+ " : " + num2str(score[label][j]);
// putText(overlay,txt,Point(rects[j].x,rects[j].y),CV_FONT_HERSHEY_SIMPLEX,0.5,Scalar(255,0,0)); //标记 类别:置信度
// putText(overlay,txt,Point(rects[j].x,rects[j].y),CV_FONT_HERSHEY_SIMPLEX,0.8,Scalar(255,0,0)); //标记 类别:置信度
int offset = drawTextOnImage(img, rects[j], 0, label_str);
int margin = 5;
drawTextOnImage(img, rects[j], offset + margin, num2str(score[label][j]));
// Point textOrg(rects[j].x, rects[j].y - 1);
// cv::putText(overlay, txt, textOrg,
// CV_FONT_HERSHEY_PLAIN, 1.0, Scalar(255, 255, 255), 1, LINE_AA);
// double alpha = 0.75;
// cv::addWeighted(overlay, alpha, img, 1 - alpha, 0, img);
}
}
cout << "time is " << double(clock() - start_time) / double(CLOCKS_PER_SEC) << endl;
//imshow("detect", img);
waitKey();
return 0;
}
如果需要使用CPU,请在代码前 #define CPU_ONLY 一下。
(13)最后一步,编译
g++ -o demo.exe main.cpp ObjectDetector.cpp -I /home/*****/caffe/include/ -I /home/*****/caffe/.build_release/src/ -I /usr/local/cuda-8.0/include/ `pkg-config --libs --cflags opencv` -L /home/****/caffe/build/lib/ -lcaffe -lglog -lboost_system -lprotobuf
然后执行./demo.exe
就可以了
注意:这里为了方便,直接输出的标签号以及对应的置信度了。可以看出 ,飞机的的label为1,船的label是4,我们从python版的demo.py中可以证实这点:
CLASSES = ('__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor')
这个根据自己模型需求改改就行。label的顺序按照py-faster-rcnn/lib/datasets/pascal_voc.py的
self._classes = ('__background__', # always index 0
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor')
顺序进行标签排序,’background‘为0,aeroplane为1,以此类推。
1、我看很多评论说,c++版本比python要更吃显存,更耗时,但我自己测试的效果没有这种感觉,不用最新caffe其实耗时和python基本差不多。
2、感觉用最新的caffe会比Faster R-CNN老版caffe要快一丁点
3、1080ti上测试 一张图平均(600*1000)0.3s (新or老caffe都差不多)
(1) 找不到libcaffe.so.1.0.0-rc3
./app.bin: error while loading shared libraries: libcaffe.so.1.0.0-rc3: cannot open shared object file: No such file or directory
解决办法:
在/etc/ld.so.conf.d/下新添加caffe.conf
$Home
sudo vi /etc/ld.so.conf.d/caffe.conf
然后把libcaffe.so.1.0.0-rc3所在的caffe/build/lib/地址写进caffe.conf
最后不要忘记 sudo ldconfig 一下