摘要:在caffe根目录下,example/cpp_classification/文件夹下classification.cpp就是用c++实现分类的;caffe没有提供现成的实现检测的c++代码,因此本文主要是通过在caffe中新添加一个层RPN层,来实现c++目标检测算法;这里的检测模型是自己训练的检测试卷手写分数的检测模型。
windows i7-3520M CPU @2.9GHz 内存3.23G、双核四线程
Visual Studio 2013
博主-真小假的纯C++版的Faster R-CNN(通过caffe自定义RPN层实现)
(备注:博主写的已经很完善了,但是在用vs2013编译的过程中,出现了一些坑,特此记录)
在这里需要把RPN新层添加到Microsoft/caffe(github下载)中,编译pycaffe即可(因为我们不做训练,只需要通过网络进行前向预测)。
#ifndef CAFFE_RPN_LAYER_HPP_
#define CAFFE_RPN_LAYER_HPP_
#include
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/common.hpp"
#include "caffe/proto/caffe.pb.h"
#define mymax(a,b) ((a)>(b))?(a):(b)
#define mymin(a,b) ((a)>(b))?(b):(a)
namespace caffe {
/**
* @brief implement RPN layer for faster rcnn
*/
template
class RPNLayer : public Layer {
public:
explicit RPNLayer(const LayerParameter& param)
: Layer(param) {
m_score_.reset(new Blob());
m_box_.reset(new Blob());
local_anchors_.reset(new Blob());
}
virtual void LayerSetUp(const vector*>& bottom,
const vector*>& top);
virtual void Reshape(const vector*>& bottom,
const vector*>& top){}
virtual inline const char* type() const { return "RPN"; }
struct abox{
Dtype batch_ind;
Dtype x1;
Dtype y1;
Dtype x2;
Dtype y2;
Dtype score;
bool operator <(const abox&tmp) const{
return score < tmp.score;
}
};
protected:
virtual void Forward_cpu(const vector*>& bottom,
const vector*>& top);
virtual void Forward_gpu(const vector*>& bottom,
const vector*>& top);
virtual void Backward_cpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom);
virtual void Backward_gpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom);
int feat_stride_;
int base_size_;
int min_size_;
int pre_nms_topN_;
int post_nms_topN_;
float nms_thresh_;
vector anchor_scales_;
vector ratios_;
vector > gen_anchors_;
int *anchors_;
int anchors_nums_;
int src_height_;
int src_width_;
float src_scale_;
int map_width_;
int map_height_;
shared_ptr > m_score_;
shared_ptr > m_box_;
shared_ptr >local_anchors_;
void generate_anchors();
vector > ratio_enum(vector);
vector whctrs(vector);
vector mkanchor(float w, float h, float x_ctr, float y_ctr);
vector > scale_enum(vector);
void proposal_local_anchor();
//cv::Mat proposal_local_anchor(int width, int height);
void bbox_tranform_inv();
cv::Mat bbox_tranform_inv(cv::Mat local_anchors, cv::Mat boxs_delta);
void nms(std::vector &input_boxes, float nms_thresh);
void filter_boxs(vector& aboxes);
//void filter_boxs(cv::Mat& pre_box, cv::Mat& score, vector& aboxes);
};
} // namespace caffe
#endif // CAFFE_RPN_LAYER_HPP_
#include
#include
#include "caffe/layers/rpn_layer.hpp"
#include "caffe/util/math_functions.hpp"
#include
int debug = 0;
int tmp[9][4] = {
{ -83, -39, 100, 56 },
{ -175, -87, 192, 104 },
{ -359, -183, 376, 200 },
{ -55, -55, 72, 72 },
{ -119, -119, 136, 136 },
{ -247, -247, 264, 264 },
{ -35, -79, 52, 96 },
{ -79, -167, 96, 184 },
{ -167, -343, 184, 360 }
};
namespace caffe {
template
void RPNLayer::LayerSetUp(
const vector*>& bottom, const vector*>& top) {
anchor_scales_.clear();
ratios_.clear();
feat_stride_ = this->layer_param_.rpn_param().feat_stride();
base_size_ = this->layer_param_.rpn_param().basesize();
min_size_ = this->layer_param_.rpn_param().boxminsize();
pre_nms_topN_ = this->layer_param_.rpn_param().per_nms_topn();
post_nms_topN_ = this->layer_param_.rpn_param().post_nms_topn();
nms_thresh_ = this->layer_param_.rpn_param().nms_thresh();
int scales_num = this->layer_param_.rpn_param().scale_size();
for (int i = 0; i < scales_num; ++i)
{
anchor_scales_.push_back(this->layer_param_.rpn_param().scale(i));
}
int ratios_num = this->layer_param_.rpn_param().ratio_size();
for (int i = 0; i < ratios_num; ++i)
{
ratios_.push_back(this->layer_param_.rpn_param().ratio(i));
}
generate_anchors();
anchors_nums_ = gen_anchors_.size();
anchors_ = new int[anchors_nums_ * 4];
for (int i = 0; iReshape(1, 5, 1, 1);
if (top.size() > 1)
{
top[1]->Reshape(1, 1, 1, 1);
}
}
template
void RPNLayer::generate_anchors(){
//generate base anchor
vector base_anchor;
base_anchor.push_back(0);
base_anchor.push_back(0);
base_anchor.push_back(base_size_ - 1);
base_anchor.push_back(base_size_ - 1);
//enum ratio anchors
vector >ratio_anchors = ratio_enum(base_anchor);
for (int i = 0; i < ratio_anchors.size(); ++i)
{
vector > tmp = scale_enum(ratio_anchors[i]);
gen_anchors_.insert(gen_anchors_.end(), tmp.begin(), tmp.end());
}
}
template
vector > RPNLayer::scale_enum(vector anchor){
vector > result;
vector reform_anchor = whctrs(anchor);
float x_ctr = reform_anchor[2];
float y_ctr = reform_anchor[3];
float w = reform_anchor[0];
float h = reform_anchor[1];
for (int i = 0; i < anchor_scales_.size(); ++i)
{
float ws = w * anchor_scales_[i];
float hs = h * anchor_scales_[i];
vector tmp = mkanchor(ws, hs, x_ctr, y_ctr);
result.push_back(tmp);
}
return result;
}
template
vector > RPNLayer::ratio_enum(vector anchor){
vector > result;
vector reform_anchor = whctrs(anchor);
float x_ctr = reform_anchor[2];
float y_ctr = reform_anchor[3];
float size = reform_anchor[0] * reform_anchor[1];
for (int i = 0; i < ratios_.size(); ++i)
{
float size_ratios = size / ratios_[i];
float ws = round(sqrt(size_ratios));
float hs = round(ws*ratios_[i]);
vector tmp = mkanchor(ws, hs, x_ctr, y_ctr);
result.push_back(tmp);
}
return result;
}
template
vector RPNLayer::mkanchor(float w, float h, float x_ctr, float y_ctr){
vector tmp;
tmp.push_back(x_ctr - 0.5*(w - 1));
tmp.push_back(y_ctr - 0.5*(h - 1));
tmp.push_back(x_ctr + 0.5*(w - 1));
tmp.push_back(y_ctr + 0.5*(h - 1));
return tmp;
}
template
vector RPNLayer::whctrs(vector anchor){
vector result;
result.push_back(anchor[2] - anchor[0] + 1); //w
result.push_back(anchor[3] - anchor[1] + 1); //h
result.push_back((anchor[2] + anchor[0]) / 2); //ctrx
result.push_back((anchor[3] + anchor[1]) / 2); //ctry
return result;
}
template
void RPNLayer::proposal_local_anchor(){
int length = mymax(map_width_, map_height_);
int step = map_width_*map_height_;
int *map_m = new int[length];
for (int i = 0; i < length; ++i)
{
map_m[i] = i*feat_stride_;
}
Dtype *shift_x = new Dtype[step];
Dtype *shift_y = new Dtype[step];
for (int i = 0; i < map_height_; ++i)
{
for (int j = 0; j < map_width_; ++j)
{
shift_x[i*map_width_ + j] = map_m[j];
shift_y[i*map_width_ + j] = map_m[i];
}
}
local_anchors_->Reshape(1, anchors_nums_ * 4, map_height_, map_width_);
Dtype *a = local_anchors_->mutable_cpu_data();
for (int i = 0; i < anchors_nums_; ++i)
{
caffe_set(step, Dtype(anchors_[i * 4 + 0]), a + (i * 4 + 0) *step);
caffe_set(step, Dtype(anchors_[i * 4 + 1]), a + (i * 4 + 1) *step);
caffe_set(step, Dtype(anchors_[i * 4 + 2]), a + (i * 4 + 2) *step);
caffe_set(step, Dtype(anchors_[i * 4 + 3]), a + (i * 4 + 3) *step);
caffe_axpy(step, Dtype(1), shift_x, a + (i * 4 + 0)*step);
caffe_axpy(step, Dtype(1), shift_x, a + (i * 4 + 2)*step);
caffe_axpy(step, Dtype(1), shift_y, a + (i * 4 + 1)*step);
caffe_axpy(step, Dtype(1), shift_y, a + (i * 4 + 3)*step);
}
}
template
void caffe::RPNLayer::filter_boxs(vector& aboxes)
{
float localMinSize = min_size_*src_scale_;
aboxes.clear();
int map_width = m_box_->width();
int map_height = m_box_->height();
int map_channel = m_box_->channels();
const Dtype *box = m_box_->cpu_data();
const Dtype *score = m_score_->cpu_data();
int step = 4 * map_height*map_width;
int one_step = map_height*map_width;
int offset_w, offset_h, offset_x, offset_y, offset_s;
for (int h = 0; h < map_height; ++h)
{
for (int w = 0; w < map_width; ++w)
{
offset_x = h*map_width + w;
offset_y = offset_x + one_step;
offset_w = offset_y + one_step;
offset_h = offset_w + one_step;
offset_s = one_step*anchors_nums_ + h*map_width + w;
for (int c = 0; c < map_channel / 4; ++c)
{
Dtype width = box[offset_w], height = box[offset_h];
if (width < localMinSize || height < localMinSize)
{
}
else
{
abox tmp;
tmp.batch_ind = 0;
tmp.x1 = box[offset_x] - 0.5*width;
tmp.y1 = box[offset_y] - 0.5*height;
tmp.x2 = box[offset_x] + 0.5*width;
tmp.y2 = box[offset_y] + 0.5*height;
tmp.x1 = mymin(mymax(tmp.x1, 0), src_width_);
tmp.y1 = mymin(mymax(tmp.y1, 0), src_height_);
tmp.x2 = mymin(mymax(tmp.x2, 0), src_width_);
tmp.y2 = mymin(mymax(tmp.y2, 0), src_height_);
tmp.score = score[offset_s];
aboxes.push_back(tmp);
}
offset_x += step;
offset_y += step;
offset_w += step;
offset_h += step;
offset_s += one_step;
}
}
}
}
template
void RPNLayer::bbox_tranform_inv(){
int channel = m_box_->channels();
int height = m_box_->height();
int width = m_box_->width();
int step = height*width;
Dtype * a = m_box_->mutable_cpu_data();
Dtype * b = local_anchors_->mutable_cpu_data();
for (int i = 0; i < channel / 4; ++i)
{
caffe_axpy(2 * step, Dtype(-1), b + (i * 4 + 0)*step, b + (i * 4 + 2)*step);
caffe_add_scalar(2 * step, Dtype(1), b + (i * 4 + 2)*step);
caffe_axpy(2 * step, Dtype(0.5), b + (i * 4 + 2)*step, b + (i * 4 + 0)*step);
caffe_mul(2 * step, b + (i * 4 + 2)*step, a + (i * 4 + 0)*step, a + (i * 4 + 0)*step);
caffe_add(2 * step, b + (i * 4 + 0)*step, a + (i * 4 + 0)*step, a + (i * 4 + 0)*step);
caffe_exp(2 * step, a + (i * 4 + 2)*step, a + (i * 4 + 2)*step);
caffe_mul(2 * step, b + (i * 4 + 2)*step, a + (i * 4 + 2)*step, a + (i * 4 + 2)*step);
}
}
template
void RPNLayer::nms(std::vector &input_boxes, float nms_thresh){
std::vectorvArea(input_boxes.size());
for (int i = 0; i < input_boxes.size(); ++i)
{
vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
}
for (int i = 0; i < input_boxes.size(); ++i)
{
for (int j = i + 1; j < input_boxes.size();)
{
float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1);
float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1);
float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2);
float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2);
float w = std::max(float(0), xx2 - xx1 + 1);
float h = std::max(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= nms_thresh)
{
input_boxes.erase(input_boxes.begin() + j);
vArea.erase(vArea.begin() + j);
}
else
{
j++;
}
}
}
}
template
void RPNLayer::Forward_cpu(
const vector*>& bottom,
const vector*>& top) {
map_width_ = bottom[1]->width();
map_height_ = bottom[1]->height();
//int channels = bottom[1]->channels();
//get boxs_delta,向右
m_box_->CopyFrom(*(bottom[1]), false, true);
//get sores 向右,前面anchors_nums_个位bg的得分,后面anchors_nums_为fg得分,我们需要的是后面的。
m_score_->CopyFrom(*(bottom[0]), false, true);
//get im_info
src_height_ = bottom[2]->data_at(0, 0, 0, 0);
src_width_ = bottom[2]->data_at(0, 1, 0, 0);
src_scale_ = bottom[2]->data_at(0, 2, 0, 0);
//gen local anchors 向右
proposal_local_anchor();
//Convert anchors into proposals via bbox transformations
bbox_tranform_inv();
vectoraboxes;
filter_boxs(aboxes);
std::sort(aboxes.rbegin(), aboxes.rend()); //降序
if (pre_nms_topN_ > 0)
{
int tmp = mymin(pre_nms_topN_, aboxes.size());
aboxes.erase(aboxes.begin() + tmp, aboxes.end());
}
nms(aboxes, nms_thresh_);
if (post_nms_topN_ > 0)
{
int tmp = mymin(post_nms_topN_, aboxes.size());
aboxes.erase(aboxes.begin() + tmp, aboxes.end());
}
top[0]->Reshape(aboxes.size(), 5, 1, 1);
Dtype *top0 = top[0]->mutable_cpu_data();
for (int i = 0; i < aboxes.size(); ++i)
{
top0[0] = aboxes[i].batch_ind;
top0[1] = aboxes[i].x1;
top0[2] = aboxes[i].y1;
top0[3] = aboxes[i].x2;
top0[4] = aboxes[i].y2;
top0 += top[0]->offset(1);
}
if (top.size()>1)
{
top[1]->Reshape(aboxes.size(), 1, 1, 1);
Dtype *top1 = top[1]->mutable_cpu_data();
for (int i = 0; i < aboxes.size(); ++i)
{
top1[0] = aboxes[i].score;
top1 += top[1]->offset(1);
}
}
}
template
void RPNLayer::Backward_cpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom){}
/*template
void Backward_gpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom){}*/
#ifdef CPU_ONLY
STUB_GPU(RPNLayer);
#endif
INSTANTIATE_CLASS(RPNLayer);
REGISTER_LAYER_CLASS(RPN);
} // namespace caffe
//这里的ID号,必须是唯一的,不能和其它层ID重复
optional RPNParameter rpn_param = 150;
message RPNParameter {
optional uint32 feat_stride = 1;
optional uint32 basesize = 2;
repeated uint32 scale = 3;
repeated float ratio = 4;
optional uint32 boxminsize =5;
optional uint32 per_nms_topn = 9;
optional uint32 post_nms_topn = 11;
optional float nms_thresh = 8;
}
//include/common.hpp
#ifndef CAFFE_COMMON_HPP_
#define CAFFE_COMMON_HPP_
#include
#include
#include
#include
#include
#include // NOLINT(readability/streams)
#include // NOLINT(readability/streams)
#include
//src/common.cpp
#if defined(_MSC_VER)
#include
#define getpid() _getpid()
#endif
#include
#include
#include
#include
#include
#include
using namespace cv;
#include "caffe/util/rng.hpp"
namespace caffe {
// Make sure each thread can have different values.
static boost::thread_specific_ptr thread_instance_;
Caffe& Caffe::Get() {
if (!thread_instance_.get()) {
thread_instance_.reset(new Caffe());
}
return *(thread_instance_.get());
}
// random seeding
int64_t cluster_seedgen(void) {
int64_t s, seed, pid;
FILE* f = fopen("/dev/urandom", "rb");
if (f && fread(&seed, 1, sizeof(seed), f) == sizeof(seed)) {
fclose(f);
return seed;
}
LOG(INFO) << "System entropy source not available, "
"using fallback algorithm to generate seed instead.";
if (f)
fclose(f);
pid = getpid();
s = time(NULL);
seed = std::abs(((s * 181) * ((pid - 83) * 359)) % 104729);
return seed;
}
void GlobalInit(int* pargc, char*** pargv) {
// Google flags.
::gflags::ParseCommandLineFlags(pargc, pargv, true);
// Google logging.
::google::InitGoogleLogging(*(pargv)[0]);
// Provide a backtrace on segfault.
// Windows port of glogs doesn't have this function built
#if !defined(_MSC_VER)
::google::InstallFailureSignalHandler();
#endif
}
#ifdef CPU_ONLY // CPU-only Caffe.
Caffe::Caffe()
: random_generator_(), mode_(Caffe::CPU),
solver_count_(1), root_solver_(true) { }
Caffe::~Caffe() { }
void Caffe::set_random_seed(const unsigned int seed) {
// RNG seed
Get().random_generator_.reset(new RNG(seed));
}
void Caffe::SetDevice(const int device_id) {
NO_GPU;
}
void Caffe::DeviceQuery() {
NO_GPU;
}
bool Caffe::CheckDevice(const int device_id) {
NO_GPU;
return false;
}
int Caffe::FindDevice(const int start_id) {
NO_GPU;
return -1;
}
class Caffe::RNG::Generator {
public:
Generator() : rng_(new caffe::rng_t(cluster_seedgen())) {}
explicit Generator(unsigned int seed) : rng_(new caffe::rng_t(seed)) {}
caffe::rng_t* rng() { return rng_.get(); }
private:
shared_ptr rng_;
};
Caffe::RNG::RNG() : generator_(new Generator()) { }
Caffe::RNG::RNG(unsigned int seed) : generator_(new Generator(seed)) { }
Caffe::RNG& Caffe::RNG::operator=(const RNG& other) {
generator_ = other.generator_;
return *this;
}
void* Caffe::RNG::generator() {
return static_cast(generator_->rng());
}
#else // Normal GPU + CPU Caffe.
Caffe::Caffe()
: cublas_handle_(NULL), curand_generator_(NULL), random_generator_(),
mode_(Caffe::CPU), solver_count_(1), root_solver_(true) {
// Try to create a cublas handler, and report an error if failed (but we will
// keep the program running as one might just want to run CPU code).
if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
LOG(ERROR) << "Cannot create Cublas handle. Cublas won't be available.";
}
// Try to create a curand handler.
if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)
!= CURAND_STATUS_SUCCESS ||
curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen())
!= CURAND_STATUS_SUCCESS) {
LOG(ERROR) << "Cannot create Curand generator. Curand won't be available.";
}
}
Caffe::~Caffe() {
if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_));
if (curand_generator_) {
CURAND_CHECK(curandDestroyGenerator(curand_generator_));
}
}
void Caffe::set_random_seed(const unsigned int seed) {
// Curand seed
static bool g_curand_availability_logged = false;
if (Get().curand_generator_) {
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator(),
seed));
CURAND_CHECK(curandSetGeneratorOffset(curand_generator(), 0));
} else {
if (!g_curand_availability_logged) {
LOG(ERROR) <<
"Curand not available. Skipping setting the curand seed.";
g_curand_availability_logged = true;
}
}
// RNG seed
Get().random_generator_.reset(new RNG(seed));
}
void Caffe::SetDevice(const int device_id) {
int current_device;
CUDA_CHECK(cudaGetDevice(¤t_device));
if (current_device == device_id) {
return;
}
// The call to cudaSetDevice must come before any calls to Get, which
// may perform initialization using the GPU.
CUDA_CHECK(cudaSetDevice(device_id));
if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
if (Get().curand_generator_) {
CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
}
CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
CURAND_RNG_PSEUDO_DEFAULT));
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
cluster_seedgen()));
}
void Caffe::DeviceQuery() {
cudaDeviceProp prop;
int device;
if (cudaSuccess != cudaGetDevice(&device)) {
printf("No cuda device present.\n");
return;
}
CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
LOG(INFO) << "Device id: " << device;
LOG(INFO) << "Major revision number: " << prop.major;
LOG(INFO) << "Minor revision number: " << prop.minor;
LOG(INFO) << "Name: " << prop.name;
LOG(INFO) << "Total global memory: " << prop.totalGlobalMem;
LOG(INFO) << "Total shared memory per block: " << prop.sharedMemPerBlock;
LOG(INFO) << "Total registers per block: " << prop.regsPerBlock;
LOG(INFO) << "Warp size: " << prop.warpSize;
LOG(INFO) << "Maximum memory pitch: " << prop.memPitch;
LOG(INFO) << "Maximum threads per block: " << prop.maxThreadsPerBlock;
LOG(INFO) << "Maximum dimension of block: "
<< prop.maxThreadsDim[0] << ", " << prop.maxThreadsDim[1] << ", "
<< prop.maxThreadsDim[2];
LOG(INFO) << "Maximum dimension of grid: "
<< prop.maxGridSize[0] << ", " << prop.maxGridSize[1] << ", "
<< prop.maxGridSize[2];
LOG(INFO) << "Clock rate: " << prop.clockRate;
LOG(INFO) << "Total constant memory: " << prop.totalConstMem;
LOG(INFO) << "Texture alignment: " << prop.textureAlignment;
LOG(INFO) << "Concurrent copy and execution: "
<< (prop.deviceOverlap ? "Yes" : "No");
LOG(INFO) << "Number of multiprocessors: " << prop.multiProcessorCount;
LOG(INFO) << "Kernel execution timeout: "
<< (prop.kernelExecTimeoutEnabled ? "Yes" : "No");
return;
}
bool Caffe::CheckDevice(const int device_id) {
// This function checks the availability of GPU #device_id.
// It attempts to create a context on the device by calling cudaFree(0).
// cudaSetDevice() alone is not sufficient to check the availability.
// It lazily records device_id, however, does not initialize a
// context. So it does not know if the host thread has the permission to use
// the device or not.
//
// In a shared environment where the devices are set to EXCLUSIVE_PROCESS
// or EXCLUSIVE_THREAD mode, cudaSetDevice() returns cudaSuccess
// even if the device is exclusively occupied by another process or thread.
// Cuda operations that initialize the context are needed to check
// the permission. cudaFree(0) is one of those with no side effect,
// except the context initialization.
bool r = ((cudaSuccess == cudaSetDevice(device_id)) &&
(cudaSuccess == cudaFree(0)));
// reset any error that may have occurred.
cudaGetLastError();
return r;
}
int Caffe::FindDevice(const int start_id) {
// This function finds the first available device by checking devices with
// ordinal from start_id to the highest available value. In the
// EXCLUSIVE_PROCESS or EXCLUSIVE_THREAD mode, if it succeeds, it also
// claims the device due to the initialization of the context.
int count = 0;
CUDA_CHECK(cudaGetDeviceCount(&count));
for (int i = start_id; i < count; i++) {
if (CheckDevice(i)) return i;
}
return -1;
}
class Caffe::RNG::Generator {
public:
Generator() : rng_(new caffe::rng_t(cluster_seedgen())) {}
explicit Generator(unsigned int seed) : rng_(new caffe::rng_t(seed)) {}
caffe::rng_t* rng() { return rng_.get(); }
private:
shared_ptr rng_;
};
Caffe::RNG::RNG() : generator_(new Generator()) { }
Caffe::RNG::RNG(unsigned int seed) : generator_(new Generator(seed)) { }
Caffe::RNG& Caffe::RNG::operator=(const RNG& other) {
generator_.reset(other.generator_.get());
return *this;
}
void* Caffe::RNG::generator() {
return static_cast(generator_->rng());
}
const char* cublasGetErrorString(cublasStatus_t error) {
switch (error) {
case CUBLAS_STATUS_SUCCESS:
return "CUBLAS_STATUS_SUCCESS";
case CUBLAS_STATUS_NOT_INITIALIZED:
return "CUBLAS_STATUS_NOT_INITIALIZED";
case CUBLAS_STATUS_ALLOC_FAILED:
return "CUBLAS_STATUS_ALLOC_FAILED";
case CUBLAS_STATUS_INVALID_VALUE:
return "CUBLAS_STATUS_INVALID_VALUE";
case CUBLAS_STATUS_ARCH_MISMATCH:
return "CUBLAS_STATUS_ARCH_MISMATCH";
case CUBLAS_STATUS_MAPPING_ERROR:
return "CUBLAS_STATUS_MAPPING_ERROR";
case CUBLAS_STATUS_EXECUTION_FAILED:
return "CUBLAS_STATUS_EXECUTION_FAILED";
case CUBLAS_STATUS_INTERNAL_ERROR:
return "CUBLAS_STATUS_INTERNAL_ERROR";
#if CUDA_VERSION >= 6000
case CUBLAS_STATUS_NOT_SUPPORTED:
return "CUBLAS_STATUS_NOT_SUPPORTED";
#endif
#if CUDA_VERSION >= 6050
case CUBLAS_STATUS_LICENSE_ERROR:
return "CUBLAS_STATUS_LICENSE_ERROR";
#endif
}
return "Unknown cublas status";
}
const char* curandGetErrorString(curandStatus_t error) {
switch (error) {
case CURAND_STATUS_SUCCESS:
return "CURAND_STATUS_SUCCESS";
case CURAND_STATUS_VERSION_MISMATCH:
return "CURAND_STATUS_VERSION_MISMATCH";
case CURAND_STATUS_NOT_INITIALIZED:
return "CURAND_STATUS_NOT_INITIALIZED";
case CURAND_STATUS_ALLOCATION_FAILED:
return "CURAND_STATUS_ALLOCATION_FAILED";
case CURAND_STATUS_TYPE_ERROR:
return "CURAND_STATUS_TYPE_ERROR";
case CURAND_STATUS_OUT_OF_RANGE:
return "CURAND_STATUS_OUT_OF_RANGE";
case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
case CURAND_STATUS_LAUNCH_FAILURE:
return "CURAND_STATUS_LAUNCH_FAILURE";
case CURAND_STATUS_PREEXISTING_FAILURE:
return "CURAND_STATUS_PREEXISTING_FAILURE";
case CURAND_STATUS_INITIALIZATION_FAILED:
return "CURAND_STATUS_INITIALIZATION_FAILED";
case CURAND_STATUS_ARCH_MISMATCH:
return "CURAND_STATUS_ARCH_MISMATCH";
case CURAND_STATUS_INTERNAL_ERROR:
return "CURAND_STATUS_INTERNAL_ERROR";
}
return "Unknown curand status";
}
#endif // CPU_ONLY
} // namespace caffe
namespace RPN{
cv::Mat bbox_tranform_inv(cv::Mat local_anchors, cv::Mat boxs_delta){
cv::Mat pre_box(local_anchors.rows, local_anchors.cols, CV_32FC1);
for (int i = 0; i < local_anchors.rows; i++)
{
double pred_ctr_x, pred_ctr_y, src_ctr_x, src_ctr_y;
double dst_ctr_x, dst_ctr_y, dst_scl_x, dst_scl_y;
double src_w, src_h, pred_w, pred_h;
src_w = local_anchors.at(i, 2) - local_anchors.at(i, 0) + 1;
src_h = local_anchors.at(i, 3) - local_anchors.at(i, 1) + 1;
src_ctr_x = local_anchors.at(i, 0) + 0.5 * src_w;
src_ctr_y = local_anchors.at(i, 1) + 0.5 * src_h;
dst_ctr_x = boxs_delta.at(i, 0);
dst_ctr_y = boxs_delta.at(i, 1);
dst_scl_x = boxs_delta.at(i, 2);
dst_scl_y = boxs_delta.at(i, 3);
pred_ctr_x = dst_ctr_x*src_w + src_ctr_x;
pred_ctr_y = dst_ctr_y*src_h + src_ctr_y;
pred_w = exp(dst_scl_x) * src_w;
pred_h = exp(dst_scl_y) * src_h;
pre_box.at(i, 0) = pred_ctr_x - 0.5*pred_w;
pre_box.at(i, 1) = pred_ctr_y - 0.5*pred_h;
pre_box.at(i, 2) = pred_ctr_x + 0.5*pred_w;
pre_box.at(i, 3) = pred_ctr_y + 0.5*pred_h;
}
return pre_box;
}
void nms(std::vector &input_boxes, float nms_thresh){
std::vectorvArea(input_boxes.size());
for (int i = 0; i < input_boxes.size(); ++i)
{
vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
}
for (int i = 0; i < input_boxes.size(); ++i)
{
for (int j = i + 1; j < input_boxes.size();)
{
float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1);
float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1);
float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2);
float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2);
float w = std::max(float(0), xx2 - xx1 + 1);
float h = std::max(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= nms_thresh)
{
input_boxes.erase(input_boxes.begin() + j);
vArea.erase(vArea.begin() + j);
}
else
{
j++;
}
}
}
}
}
在同一解决方案下新建一个检测项目,层级结构如下:
head.h
特别注意,在用caffe某些层的时候,会报找不到XX层的错,这时候需要在head.h文件中将自己用到的层加进来,没有注册的层需要在下面的代码部分进行注册。
#include "caffe/common.hpp"
#include "caffe/layers/input_layer.hpp"
#include "caffe/layers/reshape_layer.hpp"
#include "caffe/layers/inner_product_layer.hpp"
#include "caffe/layers/dropout_layer.hpp"
#include "caffe/layers/conv_layer.hpp"
#include "caffe/layers/relu_layer.hpp"
#include "caffe/layers/rpn_layer.hpp"
#include "caffe/layers/roi_pooling_layer.hpp"
#include "caffe/layers/pooling_layer.hpp"
#include "caffe/layers/lrn_layer.hpp"
#include "caffe/layers/softmax_layer.hpp"
namespace caffe
{
extern INSTANTIATE_CLASS(InputLayer);
extern INSTANTIATE_CLASS(InnerProductLayer);
extern INSTANTIATE_CLASS(DropoutLayer);
extern INSTANTIATE_CLASS(ConvolutionLayer);
REGISTER_LAYER_CLASS(Convolution);
extern INSTANTIATE_CLASS(ReLULayer);
REGISTER_LAYER_CLASS(ReLU);
extern INSTANTIATE_CLASS(ReshapeLayer);
extern INSTANTIATE_CLASS(PoolingLayer);
REGISTER_LAYER_CLASS(Pooling);
extern INSTANTIATE_CLASS(LRNLayer);
REGISTER_LAYER_CLASS(LRN);
extern INSTANTIATE_CLASS(ROIPoolingLayer);
extern INSTANTIATE_CLASS(RPNLayer);
extern INSTANTIATE_CLASS(SoftmaxLayer);
REGISTER_LAYER_CLASS(Softmax);
ObjectDetector.hpp
#ifndef OBJECTDETECTOR_H
#define OBJECTDETECTOR_H
#define INPUT_SIZE_NARROW 600
#define INPUT_SIZE_LONG 1000
#include
#include "D://caffe_build_32_bit//caffe_build_32_bit//caffe-master//include//caffe//net.hpp"
#include "D://caffe_build_32_bit//caffe_build_32_bit//caffe-master//include//caffe//common.hpp"
#include
#include
#include
#include
ObjectDetector.cpp
#include "stdafx.h"
#include "ObjectDetector.hpp"
#include
#include
#include
#include
#include
using std::string;
using std::vector;
using std::max;
using std::min;
using namespace caffe;
ObjectDetector::ObjectDetector(const std::string &model_file, const std::string &weights_file){
#ifdef CPU_ONLY
Caffe::set_mode(Caffe::CPU);
#else
Caffe::set_mode(Caffe::GPU);
#endif
net_.reset(new Net(model_file, TEST));
net_->CopyTrainedLayersFrom(weights_file);
this->class_num_ = net_->blob_by_name("cls_prob")->channels(); //求得类别数+1
}
//对一张图片,进行检测,将结果保存进map数据结构里,分别表示每个类别对应的目标框,如果需要分数信息,则计算分数
map > ObjectDetector::detect(const cv::Mat& image, map >* objectScore){
//map > ObjectDetector::detect(const cv::Mat& image, map > &label_objs, map >* objectScore){
//
int t1 = GetTickCount();
if (objectScore != NULL) //如果需要保存置信度
objectScore->clear();
float CONF_THRESH = 0.8; //置信度阈值
float NMS_THRESH = 0.3; //非极大值抑制阈值
int max_side = max(image.rows, image.cols); //分别求出图片宽和高的较大者
int min_side = min(image.rows, image.cols);
float max_side_scale = float(max_side) / float(INPUT_SIZE_LONG); //分别求出缩放因子
float min_side_scale = float(min_side) / float(INPUT_SIZE_NARROW);
float max_scale = max(max_side_scale, min_side_scale);
float img_scale = float(1) / max_scale;
int height = int(image.rows * img_scale);
int width = int(image.cols * img_scale);
//
int t2 = GetTickCount();
cout << "detect, t2-t1=" << t2 - t1 << "ms" << endl;
int num_out;
cv::Mat cv_resized;
image.convertTo(cv_resized, CV_32FC3);
cv::resize(cv_resized, cv_resized, cv::Size(width, height));
cv::Mat mean(height, width, cv_resized.type(), cv::Scalar(102.9801, 115.9465, 122.7717));
cv::Mat normalized;
subtract(cv_resized, mean, normalized);
//
int t3 = GetTickCount();
cout << "detect, t3-t2=" << t3 - t2 << "ms" << endl;
float im_info[3];
im_info[0] = height;
im_info[1] = width;
im_info[2] = img_scale;
caffe::shared_ptr > input_layer = net_->blob_by_name("data");
input_layer->Reshape(1, normalized.channels(), height, width);
net_->Reshape();
float* input_data = input_layer->mutable_cpu_data();
vector input_channels;
for (int i = 0; i < input_layer->channels(); ++i) {
cv::Mat channel(height, width, CV_32FC1, input_data);
input_channels.push_back(channel);
input_data += height * width;
}
//
int t3_1 = GetTickCount();
cout << "detect, t3_1-t3=" << t3_1 - t3 << "ms" << endl;
cv::split(normalized, input_channels);
net_->blob_by_name("im_info")->set_cpu_data(im_info);
//
int t3_1_1 = GetTickCount();
cout << "detect, t3_1_1-t3_1=" << t3_1_1 - t3_1 << "ms" << endl;
net_->Forward(); //进行网络前向传播
//
int t4 = GetTickCount();
cout << "detect, t4-t3_1_1=" << t4 - t3_1_1 << "ms" << endl;
int num = net_->blob_by_name("rois")->num(); //产生的 ROI 个数,比如为 13949个ROI
const float *rois_data = net_->blob_by_name("rois")->cpu_data(); //维度比如为:13949*5*1*1
//int num1 = net_->blob_by_name("bbox_pred")->num(); //预测的矩形框 维度为 13949*84
cv::Mat rois_box(num, 4, CV_32FC1);
for (int i = 0; i < num; ++i)
{
rois_box.at(i, 0) = rois_data[i * 5 + 1] / img_scale;
rois_box.at(i, 1) = rois_data[i * 5 + 2] / img_scale;
rois_box.at(i, 2) = rois_data[i * 5 + 3] / img_scale;
rois_box.at(i, 3) = rois_data[i * 5 + 4] / img_scale;
}
//
int t5 = GetTickCount();
cout << "detect, t5-t4=" << t5 - t4 << "ms" << endl;
caffe::shared_ptr > bbox_delt_data = net_->blob_by_name("bbox_pred"); // 13949*84
caffe::shared_ptr > score = net_->blob_by_name("cls_prob"); // 3949*21
map > label_objs; //每个类别,对应的检测目标框
for (int i = 1; i < class_num_; ++i){ //对每个类,进行遍历
cv::Mat bbox_delt(num, 4, CV_32FC1);
for (int j = 0; j < num; ++j){
bbox_delt.at(j, 0) = bbox_delt_data->data_at(j, i * 4 + 0, 0, 0);
bbox_delt.at(j, 1) = bbox_delt_data->data_at(j, i * 4 + 1, 0, 0);
bbox_delt.at(j, 2) = bbox_delt_data->data_at(j, i * 4 + 2, 0, 0);
bbox_delt.at(j, 3) = bbox_delt_data->data_at(j, i * 4 + 3, 0, 0);
}
//cout << "rois_box:" << rois_box << endl;
//cout << "bbox_delt:" << bbox_delt << endl;
cv::Mat box_class = RPN::bbox_tranform_inv(rois_box, bbox_delt);
//cout << "box_class:" << box_class << endl;
vector aboxes; //对于 类别i,检测出的矩形框保存在这
for (int j = 0; j < box_class.rows; ++j){
if (box_class.at(j, 0) < 0) box_class.at(j, 0) = 0;
if (box_class.at(j, 0) > (image.cols - 1)) box_class.at(j, 0) = image.cols - 1;
if (box_class.at(j, 2) < 0) box_class.at(j, 2) = 0;
if (box_class.at(j, 2) > (image.cols - 1)) box_class.at(j, 2) = image.cols - 1;
if (box_class.at(j, 1) < 0) box_class.at(j, 1) = 0;
if (box_class.at(j, 1) > (image.rows - 1)) box_class.at(j, 1) = image.rows - 1;
if (box_class.at(j, 3) < 0) box_class.at(j, 3) = 0;
if (box_class.at(j, 3) > (image.rows - 1)) box_class.at(j, 3) = image.rows - 1;
RPN::abox tmp;
tmp.x1 = box_class.at(j, 0);
tmp.y1 = box_class.at(j, 1);
tmp.x2 = box_class.at(j, 2);
tmp.y2 = box_class.at(j, 3);
tmp.score = score->data_at(j, i, 0, 0);
aboxes.push_back(tmp);
}
std::sort(aboxes.rbegin(), aboxes.rend());
//RPN::nms(aboxes, NMS_THRESH); //与非极大值抑制消除对于的矩形框
for (int k = 0; k < aboxes.size();){
if (aboxes[k].score < CONF_THRESH)
aboxes.erase(aboxes.begin() + k);
else
k++;
}
//################ 将类别i的所有检测框,保存
vector rect(aboxes.size()); //对于类别i,检测出的矩形框
for (int ii = 0; ii tmp(aboxes.size()); //对于 类别i,检测出的矩形框的得分
for (int ii = 0; iiinsert(pair >(i, tmp));
}
}
//
int t6 = GetTickCount();
cout << "detect, t6-t5=" << t6 - t5 << "ms" << endl;
return label_objs;
}
main.cpp
#include "stdafx.h"
#include "ObjectDetector.hpp"
#include "head.h"
#include
#include
#include
#include
using namespace cv;
using namespace std;
string num2str(float i){
stringstream ss;
ss << i;
return ss.str();
}
vector files;
int main(int argc, char **argv){
::google::InitGoogleLogging(argv[0]);
#ifdef CPU_ONLY
cout << "Use CPU\n";
#else
cout << "Use GPU\n";
#endif
string file_path = "C://Users//w//Desktop//test//*.png";
glob(file_path, files, false);
size_t count = files.size();
int t3 = GetTickCount();
ObjectDetector detect("faster_rcnn_test.pt", "faster_rcnn_final.caffemodel");
int t4 = GetTickCount();
cout << "加载模型:" << t4 - t3 << "ms" << endl;
int t1 = GetTickCount();
for (int i = 0; i < count; i++){
Mat img = imread(files[i]);
map > score;
map > label_objs = detect.detect(img, &score); //目标检测,同时保存每个框的置信度
//map > label_objs;
//detect.detect(img, label_objs, &score); //目标检测,同时保存每个框的置信度
detect.detect(img, &score); //目标检测,同时保存每个框的置信度
//map > temp;
cout << "label_objes.size()=" << label_objs.size() << endl;
for (map >::iterator it = label_objs.begin(); it != label_objs.end(); it++){
int label = it->first; //标签
vector rects = it->second; //检测框
//for (int j = 0; j
代码都详细的贴上来了,然后编译项目成功的关键是将detect的属性页配置好。
1.首先选择 “VC++ 目录” ->“包含目录”,将自己用到的依赖include目录加进来,我把自己的贴上来;
2.然后是,“VC++目录”->“库目录”;
3.“链接器”-“输入”-“附加依赖项”
4.还有两个地方需要更改,因为我是visual studio2013新手,报了错,查到自己的项目是win32项目,所以
“C/C++”-“预处理器”-“预处理器定义”处,我改为
“链接器”-“系统”-“子系统”处改为
以上这些配置好之后,编译即可。
(刚开始写博客,有写的不清楚或者不对的地方,欢迎指正,随时交流!)