mtcnn.h
#ifndef _MTCNN_H_
#define _MTCNN_H_
#include
#include
#include
#ifdef _OPENMP
#include
#endif
using std::string;
using std::vector;
typedef struct FaceBox {
float xmin;
float ymin;
float xmax;
float ymax;
float score;
} FaceBox;
typedef struct FaceInfo {
float bbox_reg[4];
float landmark_reg[10];
float landmark[10];
FaceBox bbox;
} FaceInfo;
class MTCNN {
public:
MTCNN(const string& proto_model_dir);
MTCNN(const string& proto_model_dir, int min_face, float threhold_p=0.7f, float threhold_r=0.6f, float threhold_o = 0.6f, int device = 0, float iou_threhold = 0.7f, float factor = 0.709f);
vector Detect(const cv::Mat& img, const int min_size, const float* threshold, const float factor, const int stage);
vector Detect(const cv::Mat& img, const int stage);
protected:
vector ProposalNet(const cv::Mat& img, int min_size, float threshold, float factor);
vector NextStage(const cv::Mat& image, vector &pre_stage_res, int input_w, int input_h, int stage_num, const float threshold);
void BBoxRegression(vector& bboxes);
void BBoxPadSquare(vector& bboxes, int width, int height);
void BBoxPad(vector& bboxes, int width, int height);
void GenerateBBox(caffe::Blob* confidence, caffe::Blob* reg_box, float scale, float thresh);
std::vector NMS(std::vector& bboxes, float thresh, char methodType);
float IoU(float xmin, float ymin, float xmax, float ymax, float xmin_, float ymin_, float xmax_, float ymax_, bool is_iom = false);
private:
boost::shared_ptr> PNet_;
boost::shared_ptr> RNet_;
boost::shared_ptr> ONet_;
std::vector candidate_boxes_;
std::vector total_boxes_;
//omp
int threads_num = 4;
//pnet config
const float pnet_stride = 2;
const float pnet_cell_size = 12;
const int pnet_max_detect_num = 5000;
//mean & std
const float mean_val = 127.5f;
const float std_val = 0.0078125f;
//minibatch size
const int step_size = 128;
//mtcnn inference param.
int min_face=24;
float threhold_p=0.7f;
float threhold_r=0.6f;
float threhold_o=0.6f;
float iou_threhold =0.7f;
float factor= 0.709f;
};
#endif // _MTCNN_H_
mtcnn.cpp
//#include
#include
#include
#include "mtcnn.h"
bool CompareBBox(const FaceInfo & a, const FaceInfo & b) {
return a.bbox.score > b.bbox.score;
}
float MTCNN::IoU(float xmin, float ymin, float xmax, float ymax,
float xmin_, float ymin_, float xmax_, float ymax_, bool is_iom) {
float iw = std::min(xmax, xmax_) - std::max(xmin, xmin_) + 1;
float ih = std::min(ymax, ymax_) - std::max(ymin, ymin_) + 1;
if (iw <= 0 || ih <= 0)
return 0;
float s = iw*ih;
if (is_iom) {
float ov = s / std::min((xmax - xmin + 1)*(ymax - ymin + 1), (xmax_ - xmin_ + 1)*(ymax_ - ymin_ + 1));
return ov;
}
else {
float ov = s / ((xmax - xmin + 1)*(ymax - ymin + 1) + (xmax_ - xmin_ + 1)*(ymax_ - ymin_ + 1) - s);
return ov;
}
}
std::vector MTCNN::NMS(std::vector& bboxes,
float thresh, char methodType) {
std::vector bboxes_nms;
if (bboxes.size() == 0) {
return bboxes_nms;
}
std::sort(bboxes.begin(), bboxes.end(), CompareBBox);
int32_t select_idx = 0;
int32_t num_bbox = static_cast(bboxes.size());
std::vector mask_merged(num_bbox, 0);
bool all_merged = false;
while (!all_merged) {
while (select_idx < num_bbox && mask_merged[select_idx] == 1)
select_idx++;
if (select_idx == num_bbox) {
all_merged = true;
continue;
}
bboxes_nms.push_back(bboxes[select_idx]);
mask_merged[select_idx] = 1;
FaceBox select_bbox = bboxes[select_idx].bbox;
float area1 = static_cast((select_bbox.xmax - select_bbox.xmin + 1) * (select_bbox.ymax - select_bbox.ymin + 1));
float x1 = static_cast(select_bbox.xmin);
float y1 = static_cast(select_bbox.ymin);
float x2 = static_cast(select_bbox.xmax);
float y2 = static_cast(select_bbox.ymax);
select_idx++;
#pragma omp parallel for num_threads(threads_num)
for (int32_t i = select_idx; i < num_bbox; i++) {
if (mask_merged[i] == 1)
continue;
FaceBox & bbox_i = bboxes[i].bbox;
float x = std::max(x1, static_cast(bbox_i.xmin));
float y = std::max(y1, static_cast(bbox_i.ymin));
float w = std::min(x2, static_cast(bbox_i.xmax)) - x + 1;
float h = std::min(y2, static_cast(bbox_i.ymax)) - y + 1;
if (w <= 0 || h <= 0)
continue;
float area2 = static_cast((bbox_i.xmax - bbox_i.xmin + 1) * (bbox_i.ymax - bbox_i.ymin + 1));
float area_intersect = w * h;
switch (methodType) {
case 'u':
if (static_cast(area_intersect) / (area1 + area2 - area_intersect) > thresh)
mask_merged[i] = 1;
break;
case 'm':
if (static_cast(area_intersect) / std::min(area1, area2) > thresh)
mask_merged[i] = 1;
break;
default:
break;
}
}
}
return bboxes_nms;
}
void MTCNN::BBoxRegression(vector& bboxes) {
#pragma omp parallel for num_threads(threads_num)
for (int i = 0; i < bboxes.size(); ++i) {
FaceBox &bbox = bboxes[i].bbox;
float *bbox_reg = bboxes[i].bbox_reg;
float w = bbox.xmax - bbox.xmin + 1;
float h = bbox.ymax - bbox.ymin + 1;
bbox.xmin += bbox_reg[0] * w;
bbox.ymin += bbox_reg[1] * h;
bbox.xmax += bbox_reg[2] * w;
bbox.ymax += bbox_reg[3] * h;
}
}
void MTCNN::BBoxPad(vector& bboxes, int width, int height) {
#pragma omp parallel for num_threads(threads_num)
for (int i = 0; i < bboxes.size(); ++i) {
FaceBox &bbox = bboxes[i].bbox;
bbox.xmin = round(std::max(bbox.xmin, 0.f));
bbox.ymin = round(std::max(bbox.ymin, 0.f));
bbox.xmax = round(std::min(bbox.xmax, width - 1.f));
bbox.ymax = round(std::min(bbox.ymax, height - 1.f));
}
}
void MTCNN::BBoxPadSquare(vector& bboxes, int width, int height) {
#pragma omp parallel for num_threads(threads_num)
for (int i = 0; i < bboxes.size(); ++i) {
FaceBox &bbox = bboxes[i].bbox;
float w = bbox.xmax - bbox.xmin + 1;
float h = bbox.ymax - bbox.ymin + 1;
float side = h>w ? h : w;
bbox.xmin = round(std::max(bbox.xmin + (w - side)*0.5f, 0.f));
bbox.ymin = round(std::max(bbox.ymin + (h - side)*0.5f, 0.f));
bbox.xmax = round(std::min(bbox.xmin + side - 1, width - 1.f));
bbox.ymax = round(std::min(bbox.ymin + side - 1, height - 1.f));
}
}
void MTCNN::GenerateBBox(caffe::Blob* confidence, caffe::Blob* reg_box,
float scale, float thresh) {
int feature_map_w_ = confidence->width();
int feature_map_h_ = confidence->height();
int spatical_size = feature_map_w_*feature_map_h_;
const float* confidence_data = confidence->cpu_data() + spatical_size;
const float* reg_data = reg_box->cpu_data();
candidate_boxes_.clear();
for (int i = 0; i= thresh) {
int y = i / feature_map_w_;
int x = i - feature_map_w_ * y;
FaceInfo faceInfo;
FaceBox &faceBox = faceInfo.bbox;
faceBox.xmin = (float)(x * pnet_stride) / scale;
faceBox.ymin = (float)(y * pnet_stride) / scale;
faceBox.xmax = (float)(x * pnet_stride + pnet_cell_size - 1.f) / scale;
faceBox.ymax = (float)(y * pnet_stride + pnet_cell_size - 1.f) / scale;
faceInfo.bbox_reg[0] = reg_data[i];
faceInfo.bbox_reg[1] = reg_data[i + spatical_size];
faceInfo.bbox_reg[2] = reg_data[i + 2 * spatical_size];
faceInfo.bbox_reg[3] = reg_data[i + 3 * spatical_size];
faceBox.score = confidence_data[i];
candidate_boxes_.push_back(faceInfo);
}
}
}
MTCNN::MTCNN(const string& proto_model_dir) {
caffe::Caffe::set_mode(caffe::Caffe::GPU);
PNet_.reset(new caffe::Net((proto_model_dir + "/det1.prototxt"), caffe::TEST));
PNet_->CopyTrainedLayersFrom(proto_model_dir + "/det1.caffemodel");
RNet_.reset(new caffe::Net((proto_model_dir + "/det2.prototxt"), caffe::TEST));
RNet_->CopyTrainedLayersFrom(proto_model_dir + "/det2.caffemodel");
ONet_.reset(new caffe::Net((proto_model_dir + "/det3.prototxt"), caffe::TEST));
ONet_->CopyTrainedLayersFrom(proto_model_dir + "/det3.caffemodel");
//ONet_.reset(new Net((proto_model_dir + "/det3-half.prototxt"), TEST));
//ONet_->CopyTrainedLayersFrom(proto_model_dir + "/det3-half.caffemodel");
caffe::Blob* input_layer;
input_layer = PNet_->input_blobs()[0];
int num_channels_ = input_layer->channels();
CHECK(num_channels_ == 3) << "Input layer should have 3 channels.";
}
MTCNN::MTCNN(const string& proto_model_dir, int min_face, float threhold_p, float threhold_r, float threhold_o,
int device, float iou_threhold , float factor):
min_face(min_face),
threhold_p(threhold_p),
threhold_r(threhold_r),
threhold_o(threhold_o),
iou_threhold(iou_threhold),
factor(factor){
#ifdef _OPENMP
threads_num = omp_get_num_threads();
#endif
if (device < 0) {
caffe::Caffe::set_mode(caffe::Caffe::CPU);
}
else {
caffe::Caffe::SetDevice(device);
caffe::Caffe::set_mode(caffe::Caffe::GPU);
}
PNet_.reset(new caffe::Net((proto_model_dir + "/det1.prototxt"), caffe::TEST));
PNet_->CopyTrainedLayersFrom(proto_model_dir + "/det1.caffemodel");
RNet_.reset(new caffe::Net((proto_model_dir + "/det2.prototxt"), caffe::TEST));
RNet_->CopyTrainedLayersFrom(proto_model_dir + "/det2.caffemodel");
ONet_.reset(new caffe::Net((proto_model_dir + "/det3.prototxt"), caffe::TEST));
ONet_->CopyTrainedLayersFrom(proto_model_dir + "/det3.caffemodel");
//ONet_.reset(new Net((proto_model_dir + "/det3-half.prototxt"), TEST));
//ONet_->CopyTrainedLayersFrom(proto_model_dir + "/det3-half.caffemodel");
caffe::Blob* input_layer;
input_layer = PNet_->input_blobs()[0];
int num_channels_ = input_layer->channels();
}
vector MTCNN::ProposalNet(const cv::Mat& img, int minSize, float threshold, float factor) {
cv::Mat resized;
int width = img.cols;
int height = img.rows;
float scale = 12.f / minSize;
float minWH = std::min(height, width) *scale;
std::vector scales;
while (minWH >= 12) {
scales.push_back(scale);
minWH *= factor;
scale *= factor;
}
caffe::Blob* input_layer = PNet_->input_blobs()[0];
total_boxes_.clear();
for (int i = 0; i < scales.size(); i++) {
int ws = (int)std::ceil(width*scales[i]);
int hs = (int)std::ceil(height*scales[i]);
cv::resize(img, resized, cv::Size(ws, hs), 0, 0, cv::INTER_LINEAR);
input_layer->Reshape(1, 3, hs, ws);
PNet_->Reshape();
float * input_data = input_layer->mutable_cpu_data();
cv::Vec3b * img_data = (cv::Vec3b *)resized.data;
int spatial_size = ws* hs;
for (int k = 0; k < spatial_size; ++k) {
input_data[k] = float((img_data[k][0] - mean_val)* std_val);
input_data[k + spatial_size] = float((img_data[k][1] - mean_val) * std_val);
input_data[k + 2 * spatial_size] = float((img_data[k][2] - mean_val) * std_val);
}
PNet_->Forward();
caffe::Blob* confidence = PNet_->blob_by_name("prob1").get();
caffe::Blob* reg = PNet_->blob_by_name("conv4-2").get();
GenerateBBox(confidence, reg, scales[i], threshold);
std::vector bboxes_nms = NMS(candidate_boxes_, 0.5f, 'u');
if (bboxes_nms.size()>0) {
total_boxes_.insert(total_boxes_.end(), bboxes_nms.begin(), bboxes_nms.end());
}
}
int num_box = (int)total_boxes_.size();
vector res_boxes;
if (num_box != 0) {
res_boxes = NMS(total_boxes_, 0.7f, 'u');
BBoxRegression(res_boxes);
BBoxPadSquare(res_boxes, width, height);
}
return res_boxes;
}
vector MTCNN::NextStage(const cv::Mat& image, vector &pre_stage_res, int input_w, int input_h, int stage_num, const float threshold) {
vector res;
int batch_size = (int)pre_stage_res.size();
if (batch_size == 0)
return res;
caffe::Blob* input_layer = nullptr;
caffe::Blob* confidence = nullptr;
caffe::Blob* reg_box = nullptr;
caffe::Blob* reg_landmark = nullptr;
switch (stage_num) {
case 2: {
input_layer = RNet_->input_blobs()[0];
input_layer->Reshape(batch_size, 3, input_h, input_w);
RNet_->Reshape();
}break;
case 3: {
input_layer = ONet_->input_blobs()[0];
input_layer->Reshape(batch_size, 3, input_h, input_w);
ONet_->Reshape();
}break;
default:
return res;
break;
}
float * input_data = input_layer->mutable_cpu_data();
int spatial_size = input_h*input_w;
#pragma omp parallel for num_threads(threads_num)
for (int n = 0; n < batch_size; ++n) {
FaceBox &box = pre_stage_res[n].bbox;
cv::Mat roi = image(cv::Rect(cv::Point((int)box.xmin, (int)box.ymin), cv::Point((int)box.xmax, (int)box.ymax))).clone();
resize(roi, roi, cv::Size(input_w, input_h));
float *input_data_n = input_data + input_layer->offset(n);
cv::Vec3b *roi_data = (cv::Vec3b *)roi.data;
CHECK_EQ(roi.isContinuous(), true);
for (int k = 0; k < spatial_size; ++k) {
input_data_n[k] = float((roi_data[k][0] - mean_val)*std_val);
input_data_n[k + spatial_size] = float((roi_data[k][1] - mean_val)*std_val);
input_data_n[k + 2 * spatial_size] = float((roi_data[k][2] - mean_val)*std_val);
}
}
switch (stage_num) {
case 2: {
RNet_->Forward();
confidence = RNet_->blob_by_name("prob1").get();
reg_box = RNet_->blob_by_name("conv5-2").get();
}break;
case 3: {
ONet_->Forward();
confidence = ONet_->blob_by_name("prob1").get();
reg_box = ONet_->blob_by_name("conv6-2").get();
reg_landmark = ONet_->blob_by_name("conv6-3").get();
}break;
}
const float* confidence_data = confidence->cpu_data();
const float* reg_data = reg_box->cpu_data();
const float* landmark_data = nullptr;
if (reg_landmark) {
landmark_data = reg_landmark->cpu_data();
}
for (int k = 0; k < batch_size; ++k) {
if (confidence_data[2 * k + 1] >= threshold) {
FaceInfo info;
info.bbox.score = confidence_data[2 * k + 1];
info.bbox.xmin = pre_stage_res[k].bbox.xmin;
info.bbox.ymin = pre_stage_res[k].bbox.ymin;
info.bbox.xmax = pre_stage_res[k].bbox.xmax;
info.bbox.ymax = pre_stage_res[k].bbox.ymax;
for (int i = 0; i < 4; ++i) {
info.bbox_reg[i] = reg_data[4 * k + i];
}
if (reg_landmark) {
float w = info.bbox.xmax - info.bbox.xmin + 1.f;
float h = info.bbox.ymax - info.bbox.ymin + 1.f;
for (int i = 0; i < 5; ++i) {
info.landmark[2 * i] = landmark_data[10 * k + 2 * i] * w + info.bbox.xmin;
info.landmark[2 * i + 1] = landmark_data[10 * k + 2 * i + 1] * h + info.bbox.ymin;
}
}
res.push_back(info);
}
}
return res;
}
vector MTCNN::Detect(const cv::Mat& image, const int minSize, const float* threshold, const float factor, const int stage) {
vector pnet_res;
vector rnet_res;
vector onet_res;
if (stage >= 1) {
pnet_res = ProposalNet(image, minSize, threshold[0], factor);
}
if (stage >= 2 && pnet_res.size()>0) {
if (pnet_max_detect_num < (int)pnet_res.size()) {
pnet_res.resize(pnet_max_detect_num);
}
int num = (int)pnet_res.size();
int size = (int)ceil(1.f*num / step_size);
for (int iter = 0; iter < size; ++iter) {
int start = iter*step_size;
int end = std::min(start + step_size, num);
vector input(pnet_res.begin() + start, pnet_res.begin() + end);
vector res = NextStage(image, input, 24, 24, 2, threshold[1]);
rnet_res.insert(rnet_res.end(), res.begin(), res.end());
}
rnet_res = NMS(rnet_res, 0.7f, 'u');
BBoxRegression(rnet_res);
BBoxPadSquare(rnet_res, image.cols, image.rows);
}
if (stage >= 3 && rnet_res.size()>0) {
int num = (int)rnet_res.size();
int size = (int)ceil(1.f*num / step_size);
for (int iter = 0; iter < size; ++iter) {
int start = iter*step_size;
int end = std::min(start + step_size, num);
vector input(rnet_res.begin() + start, rnet_res.begin() + end);
vector res = NextStage(image, input, 48, 48, 3, threshold[2]);
onet_res.insert(onet_res.end(), res.begin(), res.end());
}
BBoxRegression(onet_res);
onet_res = NMS(onet_res, 0.7f, 'm');
BBoxPad(onet_res, image.cols, image.rows);
}
if (stage == 1) {
return pnet_res;
}
else if (stage == 2) {
return rnet_res;
}
else if (stage == 3) {
return onet_res;
}
else {
return onet_res;
}
}
vector MTCNN::Detect(const cv::Mat& image, const int stage) {
vector pnet_res;
vector rnet_res;
vector onet_res;
if (stage >= 1) {
pnet_res = ProposalNet(image, min_face, threhold_p, factor);
}
if (stage >= 2 && pnet_res.size()>0) {
if (pnet_max_detect_num < (int)pnet_res.size()) {
pnet_res.resize(pnet_max_detect_num);
}
int num = (int)pnet_res.size();
int size = (int)ceil(1.f*num / step_size);
for (int iter = 0; iter < size; ++iter) {
int start = iter*step_size;
int end = std::min(start + step_size, num);
vector input(pnet_res.begin() + start, pnet_res.begin() + end);
vector res = NextStage(image, input, 24, 24, 2, threhold_r);
rnet_res.insert(rnet_res.end(), res.begin(), res.end());
}
rnet_res = NMS(rnet_res, iou_threhold, 'u');
BBoxRegression(rnet_res);
BBoxPadSquare(rnet_res, image.cols, image.rows);
}
if (stage >= 3 && rnet_res.size()>0) {
int num = (int)rnet_res.size();
int size = (int)ceil(1.f*num / step_size);
for (int iter = 0; iter < size; ++iter) {
int start = iter*step_size;
int end = std::min(start + step_size, num);
vector input(rnet_res.begin() + start, rnet_res.begin() + end);
vector res = NextStage(image, input, 48, 48, 3, threhold_o);
onet_res.insert(onet_res.end(), res.begin(), res.end());
}
BBoxRegression(onet_res);
onet_res = NMS(onet_res, iou_threhold, 'm');
BBoxPad(onet_res, image.cols, image.rows);
}
if (stage == 1) {
return pnet_res;
}
else if (stage == 2) {
return rnet_res;
}
else if (stage == 3) {
return onet_res;
}
else {
return onet_res;
}
}
ncnn 版本在 linux 系统或者嵌入式系统更快
mtcnn.h
#ifndef _MTCNN_H_
#define _MTCNN_H_
#include
#include
#include
#include
#ifdef _OPENMP
#include
#endif
using std::string;
using std::vector;
typedef struct FaceBox {
float xmin;
float ymin;
float xmax;
float ymax;
float score;
} FaceBox;
typedef struct FaceInfo {
float bbox_reg[4];
float landmark_reg[10];
float landmark[10];
FaceBox bbox;
} FaceInfo;
class MTCNN {
public:
MTCNN(const string& proto_model_dir, int min_face, float threhold_p=0.7f, float threhold_r=0.6f, float threhold_o = 0.6f, int device = 0, float iou_threhold = 0.7f, float factor = 0.709f);
//vector Detect(const cv::Mat& img, const int min_size, const float* threshold, const float factor, const int stage);
vector Detect(const cv::Mat& img, const int stage);
~MTCNN();
protected:
vector ProposalNet(const cv::Mat& img, int min_size, float threshold, float factor);
vector NextStage(const cv::Mat& image, vector &pre_stage_res, int input_w, int input_h, int stage_num, const float threshold);
void BBoxRegression(vector& bboxes);
void BBoxPadSquare(vector& bboxes, int width, int height);
void BBoxPad(vector& bboxes, int width, int height);
//////////////////////////
void GenerateBBox(ncnn::Mat & confidence, ncnn::Mat& reg_box, float scale, float thresh);
//////////////////////////
std::vector NMS(std::vector& bboxes, float thresh, char methodType);
float IoU(float xmin, float ymin, float xmax, float ymax, float xmin_, float ymin_, float xmax_, float ymax_, bool is_iom = false);
private:
std::shared_ptr PNet_;
std::shared_ptr RNet_;
std::shared_ptr ONet_;
std::vector candidate_boxes_;
std::vector total_boxes_;
//omp
int threads_num = 4;
//pnet config
const float pnet_stride = 2;
const float pnet_cell_size = 12;
const int pnet_max_detect_num = 5000;
//mean & std
const float mean_val = 127.5f;
const float std_val = 0.0078125f;
//minibatch size
const int step_size = 128;
//mtcnn inference param.
int min_face=24;
float threhold_p=0.7f;
float threhold_r=0.6f;
float threhold_o=0.6f;
float iou_threhold =0.7f;
float factor= 0.709f;
};
#endif // _MTCNN_H_
mtcnn.cpp
#include
#include
#include "mtcnn.h"
bool CompareBBox(const FaceInfo & a, const FaceInfo & b) {
return a.bbox.score > b.bbox.score;
}
float MTCNN::IoU(float xmin, float ymin, float xmax, float ymax,
float xmin_, float ymin_, float xmax_, float ymax_, bool is_iom) {
float iw = std::min(xmax, xmax_) - std::max(xmin, xmin_) + 1;
float ih = std::min(ymax, ymax_) - std::max(ymin, ymin_) + 1;
if (iw <= 0 || ih <= 0)
return 0;
float s = iw*ih;
if (is_iom) {
float ov = s / std::min((xmax - xmin + 1)*(ymax - ymin + 1), (xmax_ - xmin_ + 1)*(ymax_ - ymin_ + 1));
return ov;
}
else {
float ov = s / ((xmax - xmin + 1)*(ymax - ymin + 1) + (xmax_ - xmin_ + 1)*(ymax_ - ymin_ + 1) - s);
return ov;
}
}
std::vector MTCNN::NMS(std::vector& bboxes,
float thresh, char methodType) {
std::vector bboxes_nms;
if (bboxes.size() == 0) {
return bboxes_nms;
}
std::sort(bboxes.begin(), bboxes.end(), CompareBBox);
int32_t select_idx = 0;
int32_t num_bbox = static_cast(bboxes.size());
std::vector mask_merged(num_bbox, 0);
bool all_merged = false;
while (!all_merged) {
while (select_idx < num_bbox && mask_merged[select_idx] == 1)
select_idx++;
if (select_idx == num_bbox) {
all_merged = true;
continue;
}
bboxes_nms.push_back(bboxes[select_idx]);
mask_merged[select_idx] = 1;
FaceBox select_bbox = bboxes[select_idx].bbox;
float area1 = static_cast((select_bbox.xmax - select_bbox.xmin + 1) * (select_bbox.ymax - select_bbox.ymin + 1));
float x1 = static_cast(select_bbox.xmin);
float y1 = static_cast(select_bbox.ymin);
float x2 = static_cast(select_bbox.xmax);
float y2 = static_cast(select_bbox.ymax);
select_idx++;
#pragma omp parallel for num_threads(threads_num)
for (int32_t i = select_idx; i < num_bbox; i++) {
if (mask_merged[i] == 1)
continue;
FaceBox & bbox_i = bboxes[i].bbox;
float x = std::max(x1, static_cast(bbox_i.xmin));
float y = std::max(y1, static_cast(bbox_i.ymin));
float w = std::min(x2, static_cast(bbox_i.xmax)) - x + 1;
float h = std::min(y2, static_cast(bbox_i.ymax)) - y + 1;
if (w <= 0 || h <= 0)
continue;
float area2 = static_cast((bbox_i.xmax - bbox_i.xmin + 1) * (bbox_i.ymax - bbox_i.ymin + 1));
float area_intersect = w * h;
switch (methodType) {
case 'u':
if (static_cast(area_intersect) / (area1 + area2 - area_intersect) > thresh)
mask_merged[i] = 1;
break;
case 'm':
if (static_cast(area_intersect) / std::min(area1, area2) > thresh)
mask_merged[i] = 1;
break;
default:
break;
}
}
}
return bboxes_nms;
}
void MTCNN::BBoxRegression(vector& bboxes) {
#pragma omp parallel for num_threads(threads_num)
for (int i = 0; i < bboxes.size(); ++i) {
FaceBox &bbox = bboxes[i].bbox;
float *bbox_reg = bboxes[i].bbox_reg;
float w = bbox.xmax - bbox.xmin + 1;
float h = bbox.ymax - bbox.ymin + 1;
bbox.xmin += bbox_reg[0] * w;
bbox.ymin += bbox_reg[1] * h;
bbox.xmax += bbox_reg[2] * w;
bbox.ymax += bbox_reg[3] * h;
}
}
void MTCNN::BBoxPad(vector& bboxes, int width, int height) {
#pragma omp parallel for num_threads(threads_num)
for (int i = 0; i < bboxes.size(); ++i) {
FaceBox &bbox = bboxes[i].bbox;
bbox.xmin = round(std::max(bbox.xmin, 0.f));
bbox.ymin = round(std::max(bbox.ymin, 0.f));
bbox.xmax = round(std::min(bbox.xmax, width - 1.f));
bbox.ymax = round(std::min(bbox.ymax, height - 1.f));
}
}
void MTCNN::BBoxPadSquare(vector& bboxes, int width, int height) {
#pragma omp parallel for num_threads(threads_num)
for (int i = 0; i < bboxes.size(); ++i) {
FaceBox &bbox = bboxes[i].bbox;
float w = bbox.xmax - bbox.xmin + 1;
float h = bbox.ymax - bbox.ymin + 1;
float side = h>w ? h : w;
bbox.xmin = round(std::max(bbox.xmin + (w - side)*0.5f, 0.f));
bbox.ymin = round(std::max(bbox.ymin + (h - side)*0.5f, 0.f));
bbox.xmax = round(std::min(bbox.xmin + side - 1, width - 1.f));
bbox.ymax = round(std::min(bbox.ymin + side - 1, height - 1.f));
}
}
void MTCNN::GenerateBBox(ncnn::Mat & confidence, ncnn::Mat& reg_box,
float scale, float thresh) {
int feature_map_w_ = confidence.w;
int feature_map_h_ = confidence.h;
int spatical_size = feature_map_w_*feature_map_h_;
const float* confidence_data = (float *)confidence.channel(1).data;//((float*)confidence.data) + spatical_size;
const float* reg_data = (float*)reg_box.data;
candidate_boxes_.clear();
for (int i = 0; i= thresh) {
int y = i / feature_map_w_;
int x = i - feature_map_w_ * y;
FaceInfo faceInfo;
FaceBox &faceBox = faceInfo.bbox;
faceBox.xmin = (float)(x * pnet_stride) / scale;
faceBox.ymin = (float)(y * pnet_stride) / scale;
faceBox.xmax = (float)(x * pnet_stride + pnet_cell_size - 1.f) / scale;
faceBox.ymax = (float)(y * pnet_stride + pnet_cell_size - 1.f) / scale;
faceInfo.bbox_reg[0] = reg_data[i];
faceInfo.bbox_reg[1] = reg_data[i + spatical_size];
faceInfo.bbox_reg[2] = reg_data[i + 2 * spatical_size];
faceInfo.bbox_reg[3] = reg_data[i + 3 * spatical_size];
faceBox.score = confidence_data[i];
candidate_boxes_.push_back(faceInfo);
}
}
}
MTCNN::MTCNN(const string& proto_model_dir, int min_face, float threhold_p, float threhold_r, float threhold_o,
int device, float iou_threhold , float factor):
min_face(min_face),
threhold_p(threhold_p),
threhold_r(threhold_r),
threhold_o(threhold_o),
iou_threhold(iou_threhold),
factor(factor){
#ifdef _OPENMP
threads_num = omp_get_num_threads();
#endif
PNet_ = std::make_shared();
PNet_->load_param("models/det1.param");
PNet_->load_model("models/det1.bin");
RNet_ = std::make_shared();
RNet_->load_param("models/det2.param");
RNet_->load_model("models/det2.bin");
ONet_ = std::make_shared();
ONet_->load_param("models/det3-half.param");
ONet_->load_model("models/det3-half.bin");
}
MTCNN::~MTCNN() {
PNet_->clear();
RNet_->clear();
ONet_->clear();
}
vector MTCNN::ProposalNet(const cv::Mat& img, int minSize, float threshold, float factor) {
cv::Mat resized;
int width = img.cols;
int height = img.rows;
float scale = 12.f / minSize;
float minWH = std::min(height, width) *scale;
std::vector scales;
while (minWH >= 12) {
scales.push_back(scale);
minWH *= factor;
scale *= factor;
}
total_boxes_.clear();
for (int i = 0; i < scales.size(); i++) {
int ws = (int)std::ceil(width*scales[i]);
int hs = (int)std::ceil(height*scales[i]);
cv::resize(img, resized, cv::Size(ws, hs));
ncnn::Extractor ex = PNet_->create_extractor();
ex.set_light_mode(true);
ncnn::Mat confidence;
ncnn::Mat reg;
ncnn::Mat in = ncnn::Mat::from_pixels(resized.data, ncnn::Mat::PIXEL_BGR, resized.cols, resized.rows);
const float mean_vals[3] = { mean_val, mean_val, mean_val };
const float norm_vals[3] = { std_val, std_val, std_val };
in.substract_mean_normalize(mean_vals, norm_vals);
ex.input("data", in);
ex.extract("prob1", confidence);
ex.extract("conv4-2", reg);
GenerateBBox(confidence, reg, scales[i], threshold);
std::vector bboxes_nms = NMS(candidate_boxes_, 0.5f, 'u');
if (bboxes_nms.size()>0) {
total_boxes_.insert(total_boxes_.end(), bboxes_nms.begin(), bboxes_nms.end());
}
}
int num_box = (int)total_boxes_.size();
vector res_boxes;
if (num_box != 0) {
res_boxes = NMS(total_boxes_, 0.7f, 'u');
BBoxRegression(res_boxes);
BBoxPadSquare(res_boxes, width, height);
}
return res_boxes;
}
vector MTCNN::NextStage(const cv::Mat& image, vector &pre_stage_res, int input_w, int input_h, int stage_num, const float threshold) {
vector res;
int batch_size = pre_stage_res.size();
switch (stage_num) {
case 2: {
for (size_t n = 0; n < batch_size; ++n)
{
ncnn::Extractor r_ex = RNet_->create_extractor();
r_ex.set_light_mode(true);
ncnn::Mat confidence;
ncnn::Mat reg_box;
FaceBox &box = pre_stage_res[n].bbox;
cv::Mat roi = image(cv::Rect(cv::Point((int)box.xmin, (int)box.ymin), cv::Point((int)box.xmax, (int)box.ymax))).clone();
resize(roi, roi, cv::Size(input_w, input_h));
ncnn::Mat in = ncnn::Mat::from_pixels(roi.data, ncnn::Mat::PIXEL_BGR, roi.cols, roi.rows);
const float mean_vals[3] = { mean_val, mean_val, mean_val };
const float norm_vals[3] = { std_val, std_val, std_val };
in.substract_mean_normalize(mean_vals, norm_vals);
r_ex.input("data", in);
r_ex.extract("prob1", confidence);
r_ex.extract("conv5-2", reg_box);
float conf = ((float*)confidence.data)[1];
if (conf >= threshold) {
FaceInfo info;
info.bbox.score = conf;
info.bbox.xmin = pre_stage_res[n].bbox.xmin;
info.bbox.ymin = pre_stage_res[n].bbox.ymin;
info.bbox.xmax = pre_stage_res[n].bbox.xmax;
info.bbox.ymax = pre_stage_res[n].bbox.ymax;
for (int i = 0; i < 4; ++i) {
info.bbox_reg[i] = ((float*)reg_box.data)[i];
}
res.push_back(info);
}
}
break;
}
case 3:{
for (size_t n = 0; n < batch_size; ++n)
{
ncnn::Extractor o_ex = ONet_->create_extractor();
o_ex.set_light_mode(true);
ncnn::Mat confidence;
ncnn::Mat reg_box;
ncnn::Mat reg_landmark;
FaceBox &box = pre_stage_res[n].bbox;
cv::Mat roi = image(cv::Rect(cv::Point((int)box.xmin, (int)box.ymin), cv::Point((int)box.xmax, (int)box.ymax))).clone();
resize(roi, roi, cv::Size(input_w, input_h));
ncnn::Mat in = ncnn::Mat::from_pixels(roi.data, ncnn::Mat::PIXEL_BGR, roi.cols, roi.rows);
const float mean_vals[3] = { mean_val, mean_val, mean_val };
const float norm_vals[3] = { std_val, std_val, std_val };
in.substract_mean_normalize(mean_vals, norm_vals);
o_ex.input("data", in);
o_ex.extract("prob1", confidence);
o_ex.extract("conv6-2", reg_box);
o_ex.extract("conv6-3", reg_landmark);
float conf = ((float*)confidence.data)[1];
if (conf >= threshold) {
FaceInfo info;
info.bbox.score = conf;
info.bbox.xmin = pre_stage_res[n].bbox.xmin;
info.bbox.ymin = pre_stage_res[n].bbox.ymin;
info.bbox.xmax = pre_stage_res[n].bbox.xmax;
info.bbox.ymax = pre_stage_res[n].bbox.ymax;
for (int i = 0; i < 4; ++i) {
info.bbox_reg[i] = ((float*)reg_box.data)[i];
}
float w = info.bbox.xmax - info.bbox.xmin + 1.f;
float h = info.bbox.ymax - info.bbox.ymin + 1.f;
for (int i = 0; i < 5; ++i) {
info.landmark[2 * i] = ((float*)reg_landmark.data)[2 * i] * w + info.bbox.xmin;
info.landmark[2 * i + 1] = ((float*)reg_landmark.data)[2 * i + 1] * h + info.bbox.ymin;
}
res.push_back(info);
}
}
break;
}
default:
return res;
break;
}
return res;
}
//vector MTCNN::Detect(const cv::Mat& image, const int minSize, const float* threshold, const float factor, const int stage) {
// vector pnet_res;
// vector rnet_res;
// vector onet_res;
// if (stage >= 1) {
// pnet_res = ProposalNet(image, minSize, threshold[0], factor);
// }
// //double start = cv::getTickCount();
// if (stage >= 2 && pnet_res.size()>0) {
// if (pnet_max_detect_num < (int)pnet_res.size()) {
// pnet_res.resize(pnet_max_detect_num);
// }
// int num = (int)pnet_res.size();
// int size = (int)ceil((float)num / step_size);
// for (int iter = 0; iter < size; ++iter) {
// int start = iter*step_size;
// int end = std::min(start + step_size, num);
// vector input(pnet_res.begin() + start, pnet_res.begin() + end);
// vector res = NextStage(image, input, 24, 24, 2, threshold[1]);
// rnet_res.insert(rnet_res.end(), res.begin(), res.end());
// }
// rnet_res = NMS(rnet_res, 0.7f, 'u');
// BBoxRegression(rnet_res);
// BBoxPadSquare(rnet_res, image.cols, image.rows);
//
// }
// //double end = cv::getTickCount();
// //std::cout << "r net :" << (end - start) / (cv::getTickFrequency() / 1000) << " ms" << std::endl;
//
// //double start1 = cv::getTickCount();
//
// if (stage >= 3 && rnet_res.size()>0) {
// int num = (int)rnet_res.size();
// int size = (int)ceil((float)num / step_size);
// for (int iter = 0; iter < size; ++iter) {
// int start = iter*step_size;
// int end = std::min(start + step_size, num);
// vector input(rnet_res.begin() + start, rnet_res.begin() + end);
// vector res = NextStage(image, input, 48, 48, 3, threshold[2]);
// onet_res.insert(onet_res.end(), res.begin(), res.end());
// }
// BBoxRegression(onet_res);
// onet_res = NMS(onet_res, 0.7f, 'm');
// BBoxPad(onet_res, image.cols, image.rows);
// }
// //double end1 = cv::getTickCount();
// //std::cout << "o net :" << (end1 - start1) / (cv::getTickFrequency() / 1000) << " ms" << std::endl;
// if (stage == 1) {
// return pnet_res;
// }
// else if (stage == 2) {
// return rnet_res;
// }
// else if (stage == 3) {
// return onet_res;
// }
// else {
// return onet_res;
// }
//}
vector MTCNN::Detect(const cv::Mat& image, const int stage) {
vector pnet_res;
vector rnet_res;
vector onet_res;
double start = cv::getTickCount();
if (stage >= 1) {
pnet_res = ProposalNet(image, min_face, threhold_p, factor);
}
//double end = cv::getTickCount();
//std::cout <<"p size"<< pnet_res.size() <<" p net :" << (end - start) / (cv::getTickFrequency() / 1000) << " ms" << std::endl;
//double start1 = cv::getTickCount();
if (stage >= 2 && pnet_res.size()>0) {
if (pnet_max_detect_num < (int)pnet_res.size()) {
pnet_res.resize(pnet_max_detect_num);
}
int num = (int)pnet_res.size();
int size = (int)ceil((float)num / step_size);
for (int iter = 0; iter < size; ++iter) {
int start = iter*step_size;
int end = std::min(start + step_size, num);
vector input(pnet_res.begin() + start, pnet_res.begin() + end);
vector res = NextStage(image, input, 24, 24, 2, threhold_r);
rnet_res.insert(rnet_res.end(), res.begin(), res.end());
}
rnet_res = NMS(rnet_res, iou_threhold, 'u');
BBoxRegression(rnet_res);
BBoxPadSquare(rnet_res, image.cols, image.rows);
}
//double end1 = cv::getTickCount();
//std::cout <<"r size"<= 3 && rnet_res.size()>0) {
int num = (int)rnet_res.size();
int size = (int)ceil((float)num / step_size);
for (int iter = 0; iter < size; ++iter) {
int start = iter*step_size;
int end = std::min(start + step_size, num);
vector input(rnet_res.begin() + start, rnet_res.begin() + end);
vector res = NextStage(image, input, 48, 48, 3, threhold_o);
onet_res.insert(onet_res.end(), res.begin(), res.end());
}
BBoxRegression(onet_res);
onet_res = NMS(onet_res, iou_threhold, 'm');
BBoxPad(onet_res, image.cols, image.rows);
}
//double end2 = cv::getTickCount();
//std::cout << "o net :" << (end2 - start2) / (cv::getTickFrequency() / 1000) << " ms" << std::endl;
if (stage == 1) {
return pnet_res;
}
else if (stage == 2) {
return rnet_res;
}
else if (stage == 3) {
return onet_res;
}
else {
return onet_res;
}
}
#include
#include
#include
#include
#include
#include
#include
#include
#include "net.h"
#include"cpu.h"
using namespace std;
using namespace cv;
struct Bbox
{
float score;
int x1;
int y1;
int x2;
int y2;
float area;
bool exist;
float ppoint[10];
float regreCoord[4];
};
struct orderScore
{
float score;
int oriOrder;
};
void resize_image(ncnn::Mat& srcImage, ncnn::Mat& dstImage)
{
int src_width = srcImage.w;
int src_height = srcImage.h;
int src_channel = srcImage.c;
int dst_width = dstImage.w;
int dst_height = dstImage.h;
int dst_channel = dstImage.c;
if (src_width == dst_width && src_height == dst_height)
{
memcpy(dstImage.data, srcImage.data, src_width*src_height*src_channel*sizeof(float));
return;
}
float lf_x_scl = static_cast(src_width) / dst_width;
float lf_y_Scl = static_cast(src_height) / dst_height;
const float* src_data = srcImage.data;
float* dest_data = dstImage.data;
int src_area = srcImage.cstep;
int src_area2 = 2 * src_area;
int dst_area = dstImage.cstep;
int dst_area2 = 2 * dst_area;
for (int y = 0; y < dst_height; y++) {
for (int x = 0; x < dst_width; x++) {
float lf_x_s = lf_x_scl * x;
float lf_y_s = lf_y_Scl * y;
int n_x_s = static_cast(lf_x_s);
n_x_s = (n_x_s <= (src_width - 2) ? n_x_s : (src_width - 2));
int n_y_s = static_cast(lf_y_s);
n_y_s = (n_y_s <= (src_height - 2) ? n_y_s : (src_height - 2));
float lf_weight_x = lf_x_s - n_x_s;
float lf_weight_y = lf_y_s - n_y_s;
float dest_val_b = (1 - lf_weight_y) * ((1 - lf_weight_x) *
src_data[n_y_s * src_width + n_x_s] +
lf_weight_x * src_data[n_y_s * src_width + n_x_s + 1]) +
lf_weight_y * ((1 - lf_weight_x) * src_data[(n_y_s + 1) * src_width + n_x_s] +
lf_weight_x * src_data[(n_y_s + 1) * src_width + n_x_s + 1]);
float dest_val_g = (1 - lf_weight_y) * ((1 - lf_weight_x) *
src_data[n_y_s * src_width + n_x_s + src_area] +
lf_weight_x * src_data[n_y_s * src_width + n_x_s + 1 + src_area]) +
lf_weight_y * ((1 - lf_weight_x) * src_data[(n_y_s + 1) * src_width + n_x_s + src_area] +
lf_weight_x * src_data[(n_y_s + 1) * src_width + n_x_s + 1 + src_area]);
float dest_val_r = (1 - lf_weight_y) * ((1 - lf_weight_x) *
src_data[n_y_s * src_width + n_x_s + src_area2] +
lf_weight_x * src_data[n_y_s * src_width + n_x_s + 1 + src_area2]) +
lf_weight_y * ((1 - lf_weight_x) * src_data[(n_y_s + 1) * src_width + n_x_s + src_area2] +
lf_weight_x * src_data[(n_y_s + 1) * src_width + n_x_s + 1 + src_area2]);
dest_data[y * dst_width + x] = static_cast(dest_val_b);
dest_data[y * dst_width + x + dst_area] = static_cast(dest_val_g);
dest_data[y * dst_width + x + 2 * dst_area] = static_cast (dest_val_r);
}
}
}
bool cmpScore(orderScore lsh, orderScore rsh){
if(lsh.score& finalBbox);
cv::Mat cp_img;
private:
void generateBbox(ncnn::Mat score, ncnn::Mat location, vector& boundingBox_, vector& bboxScore_, float scale);
void nms(vector &boundingBox_, std::vector &bboxScore_, const float overlap_threshold, string modelname="Union");
void refineAndSquareBbox(vector &vecBbox, const int &height, const int &width);
ncnn::Net Pnet, Rnet, Onet;
ncnn::Mat img;
float nms_threshold[3];// = { 0.5, 0.7, 0.7 };
float threshold[3];// = {0.8, 0.8, 0.8};
float mean_vals[3];// = {127.5, 127.5, 127.5};
float norm_vals[3];// = {0.0078125, 0.0078125, 0.0078125};
std::vector firstBbox_, secondBbox_,thirdBbox_;
std::vector firstOrderScore_, secondBboxScore_, thirdBboxScore_;
int img_w, img_h;
};
mtcnn::mtcnn(){
for (int i = 0; i < 3; i++)
{
nms_threshold[i]=0.7;// = { 0.5, 0.7, 0.7 };
threshold[i]=0.7;// = {0.8, 0.8, 0.8};
mean_vals[i]=127.5;// = {127.5, 127.5, 127.5};
norm_vals[i]=0.0078125;// = {0.0078125, 0.0078125, 0.0078125};
}
nms_threshold[0] = 0.5;
Pnet.load_param("E:/Algrithm/MTCNN/MTCNN-master/mtcnn_caffe/model/det1.param");
Pnet.load_model("E:/Algrithm/MTCNN/MTCNN-master/mtcnn_caffe/model/det1.bin");
Rnet.load_param("E:/Algrithm/MTCNN/MTCNN-master/mtcnn_caffe/model/det2.param");
Rnet.load_model("E:/Algrithm/MTCNN/MTCNN-master/mtcnn_caffe/model/det2.bin");
Onet.load_param("E:/Algrithm/MTCNN/MTCNN-master/mtcnn_caffe/model/det3.param");
Onet.load_model("E:/Algrithm/MTCNN/MTCNN-master/mtcnn_caffe/model/det3.bin");
//cp_img.create(295, 413, CV_8UC3);
//const char* imagepath = "E:/Algrithm/ncnn/ncnn/x64/Release/test3.jpg";// argv[1];
//cp_img = cv::imread(imagepath);
}
/******************generateBbox******************************/
//根据Pnet的输出结果,由滑框的得分,筛选可能是人脸的滑框,并记录该框的位置、人脸坐标信息、得分以及编号
void mtcnn::generateBbox(ncnn::Mat score, ncnn::Mat location, std::vector& boundingBox_, std::vector& bboxScore_, float scale){
int stride = 2;//Pnet中有一次MP2*2,后续转换的时候相当于stride=2;
int cellsize = 12;
int count = 0;
//score p
float *p = score.channel(1);//score.data + score.cstep;//判定为人脸的概率
//float *plocal = location.data;
Bbox bbox;
orderScore order;
// float max_p = 0;
for(int row=0;rowmax_p)
//{
// max_p = *p;
//}
if(*p>threshold[0]){
bbox.score = *p;//记录得分
order.score = *p;
order.oriOrder = count;//记录有效滑框的编号
bbox.x1 = round((stride*col+1)/scale);//12*12的滑框,换算到原始图像上的坐标
bbox.y1 = round((stride*row+1)/scale);
bbox.x2 = round((stride*col+1+cellsize)/scale);
bbox.y2 = round((stride*row+1+cellsize)/scale);
bbox.exist = true;
bbox.area = (bbox.x2 - bbox.x1)*(bbox.y2 - bbox.y1);
for(int channel=0;channel<4;channel++)
bbox.regreCoord[channel]=location.channel(channel)[0];//人脸框的坐标相关值
boundingBox_.push_back(bbox);
bboxScore_.push_back(order);
count++;
}
p++;
//plocal++;
}
}
//printf("Pnet max prob: %f\n",max_p);
}
/**********************nms非极大值抑制****************************/
void mtcnn::nms(std::vector &boundingBox_, std::vector &bboxScore_, const float overlap_threshold, string modelname){
if(boundingBox_.empty()){
return;
}
std::vector heros;
//sort the score
sort(bboxScore_.begin(), bboxScore_.end(), cmpScore);//cmpScore指定升序排列
int order = 0;
float IOU = 0;
float maxX = 0;
float maxY = 0;
float minX = 0;
float minY = 0;
//规则,站上擂台的擂台主,永远都是胜利者。
while(bboxScore_.size()>0){
order = bboxScore_.back().oriOrder;//取得分最高勇士的编号ID。
bboxScore_.pop_back();//勇士出列
if(order<0)continue;//死的?下一个!(order在(*it).oriOrder = -1;改变)
heros.push_back(order);//记录擂台主ID
boundingBox_.at(order).exist = false;//当前这个Bbox为擂台主,签订生死簿。
for(int num=0;numboundingBox_.at(order).x1)?boundingBox_.at(num).x1:boundingBox_.at(order).x1;
maxY = (boundingBox_.at(num).y1>boundingBox_.at(order).y1)?boundingBox_.at(num).y1:boundingBox_.at(order).y1;
minX = (boundingBox_.at(num).x20)?(minX-maxX+1):0;
maxY = ((minY-maxY+1)>0)?(minY-maxY+1):0;
//IOU reuse for the area of two bbox
IOU = maxX * maxY;
if(!modelname.compare("Union"))
IOU = IOU/(boundingBox_.at(num).area + boundingBox_.at(order).area - IOU);
else if(!modelname.compare("Min")){
IOU = IOU/((boundingBox_.at(num).areaoverlap_threshold){
boundingBox_.at(num).exist=false;//如果该对比框与擂台主的IOU够大,挑战者勇士战死
for(vector::iterator it=bboxScore_.begin(); it!=bboxScore_.end();it++){
if((*it).oriOrder == num) {
(*it).oriOrder = -1;//勇士战死标志
break;
}
}
}//else 那些距离擂台主比较远迎战者幸免于难,将有机会作为擂台主出现
}
}
}
for(int i=0;i &vecBbox, const int &height, const int &width){
if(vecBbox.empty()){
cout<<"Bbox is empty!!"<::iterator it=vecBbox.begin(); it!=vecBbox.end();it++){
if((*it).exist){
bbw = (*it).x2 - (*it).x1 + 1;//滑框的宽高计算
bbh = (*it).y2 - (*it).y1 + 1;
x1 = (*it).x1 + (*it).regreCoord[0]*bbw;//人脸框的位置坐标计算
y1 = (*it).y1 + (*it).regreCoord[1]*bbh;
x2 = (*it).x2 + (*it).regreCoord[2]*bbw;
y2 = (*it).y2 + (*it).regreCoord[3]*bbh;
w = x2 - x1 + 1;//人脸框宽高
h = y2 - y1 + 1;
maxSide = (h>w)?h:w;
x1 = x1 + w*0.5 - maxSide*0.5;
y1 = y1 + h*0.5 - maxSide*0.5;
(*it).x2 = round(x1 + maxSide - 1);
(*it).y2 = round(y1 + maxSide - 1);
(*it).x1 = round(x1);
(*it).y1 = round(y1);
//boundary check
if((*it).x1<0)(*it).x1=0;
if((*it).y1<0)(*it).y1=0;
if((*it).x2>width)(*it).x2 = width - 1;
if((*it).y2>height)(*it).y2 = height - 1;
it->area = (it->x2 - it->x1)*(it->y2 - it->y1);
}
}
}
void mtcnn::detect(ncnn::Mat& img_, std::vector& finalBbox_){
img = img_;
img_w = img.w;
img_h = img.h;
img.substract_mean_normalize(mean_vals, norm_vals);//数据预处理,归一化至(-1,1)
float minl = img_w scales_;
while(minl>MIN_DET_SIZE){
if (factor_count > 0){ m = m*factor; }
scales_.push_back(m);
minl *= factor;
factor_count++;
}
orderScore order;
int count = 0;
for (size_t i = 0; i < scales_.size(); i++) {
int hs = (int)ceil(img_h*scales_[i]);
int ws = (int)ceil(img_w*scales_[i]);
ncnn::Mat in(ws, hs, 3);
resize_image(img, in);//一次次生成图像金字塔中的一层图
ncnn::Extractor ex = Pnet.create_extractor();
ex.set_light_mode(true);
printf("Pnet input width:%d, height:%d, channel:%d\n",in.w,in.h,in.c);
ex.input("data", in);//Pnet只有卷积层,所以可以接受不同size的input
ncnn::Mat score_, location_;
ex.extract("prob1", score_);
printf("prob1 w:%d, h:%d, ch:%d, first data:%f\n", score_.w, score_.h, score_.c, score_.data[0]);
//for (int t_w = 0; t_w < score_.w*score_.h*score_.c; t_w++)
//{
// printf("%f, ", score_.data[t_w]);
//}
ex.extract("conv4-2", location_);
std::vector boundingBox_;
std::vector bboxScore_;
generateBbox(score_, location_, boundingBox_, bboxScore_, scales_[i]);
nms(boundingBox_, bboxScore_, nms_threshold[0]);//分会场擂台赛
for(vector::iterator it=boundingBox_.begin(); it!=boundingBox_.end();it++){
if((*it).exist){//获胜擂台主得到进入主会场的机会
firstBbox_.push_back(*it);//主会场花名册
order.score = (*it).score;
order.oriOrder = count;
firstOrderScore_.push_back(order);
count++;
}
}
bboxScore_.clear();
boundingBox_.clear();
}
//the first stage's nms
if(count<1)return;
nms(firstBbox_, firstOrderScore_, nms_threshold[0]);//主会场擂台赛
refineAndSquareBbox(firstBbox_, img_h, img_w);
printf("firstBbox_.size()=%d\n", firstBbox_.size());
//for (vector::iterator it = firstBbox_.begin(); it != firstBbox_.end(); it++)
//{
// cout << "OK" << endl;
// //rectangle(cp_img, Point((*it).x1, (*it).y1), Point((*it).x2, (*it).y2), Scalar(0, 0, 255), 2, 8, 0);
//}
//imshow("Pnet.jpg", cp_img);
//waitKey(1000);
//second stage
count = 0;
for(vector::iterator it=firstBbox_.begin(); it!=firstBbox_.end();it++){
if((*it).exist){
ncnn::Mat tempIm;
copy_cut_border(img, tempIm, (*it).y1, img_h-(*it).y2, (*it).x1, img_w-(*it).x2);
ncnn::Mat in(24, 24, 3);
resize_image(tempIm, in);
ncnn::Extractor ex = Rnet.create_extractor();
ex.set_light_mode(true);
ex.input("data", in);
ncnn::Mat score, bbox;
ex.extract("prob1", score);
ex.extract("conv5-2", bbox);
if(*(score.data+score.cstep)>threshold[1]){
for(int channel=0;channel<4;channel++)
it->regreCoord[channel]=bbox.channel(channel)[0];//*(bbox.data+channel*bbox.cstep);
it->area = (it->x2 - it->x1)*(it->y2 - it->y1);
it->score = score.channel(1)[0];//*(score.data+score.cstep);
secondBbox_.push_back(*it);
order.score = it->score;
order.oriOrder = count++;
secondBboxScore_.push_back(order);
}
else{
(*it).exist=false;
}
}
}
printf("secondBbox_.size()=%d\n", secondBbox_.size());
if(count<1)return;
nms(secondBbox_, secondBboxScore_, nms_threshold[1]);
refineAndSquareBbox(secondBbox_, img_h, img_w);
//third stage
count = 0;
for(vector::iterator it=secondBbox_.begin(); it!=secondBbox_.end();it++){
if((*it).exist){
ncnn::Mat tempIm;
copy_cut_border(img, tempIm, (*it).y1, img_h-(*it).y2, (*it).x1, img_w-(*it).x2);
ncnn::Mat in(48, 48, 3);
resize_image(tempIm, in);
ncnn::Extractor ex = Onet.create_extractor();
ex.set_light_mode(true);
ex.input("data", in);
ncnn::Mat score, bbox, keyPoint;
ex.extract("prob1", score);
ex.extract("conv6-2", bbox);
ex.extract("conv6-3", keyPoint);
if(score.channel(1)[0]>threshold[2]){
for(int channel=0;channel<4;channel++)
it->regreCoord[channel]=bbox.channel(channel)[0];
it->area = (it->x2 - it->x1)*(it->y2 - it->y1);
it->score = score.channel(1)[0];
for(int num=0;num<5;num++){
(it->ppoint)[num] = it->x1 + (it->x2 - it->x1)*keyPoint.channel(num)[0];
(it->ppoint)[num+5] = it->y1 + (it->y2 - it->y1)*keyPoint.channel(num+5)[0];
}
thirdBbox_.push_back(*it);
order.score = it->score;
order.oriOrder = count++;
thirdBboxScore_.push_back(order);
}
else
(*it).exist=false;
}
}
printf("thirdBbox_.size()=%d\n", thirdBbox_.size());
if(count<1)return;
refineAndSquareBbox(thirdBbox_, img_h, img_w);
nms(thirdBbox_, thirdBboxScore_, nms_threshold[2], "Min");
finalBbox_ = thirdBbox_;
firstBbox_.clear();
firstOrderScore_.clear();
secondBbox_.clear();
secondBboxScore_.clear();
thirdBbox_.clear();
thirdBboxScore_.clear();
}
int main(int argc, char** argv)
{
/******读图(start)*******/
const char* imagepath ;// argv[1];
if (argc == 2)
{
imagepath = argv[1];
}
else{
imagepath = "E:/Algrithm/ncnn/ncnn/x64/Release/test2.jpg";
}
cout << imagepath << endl;
cv::Mat cv_img = cv::imread(imagepath);
if (cv_img.data==NULL)
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
system("pause");
return -1;
}
printf("img w: %d h:%d ch:%d\n",cv_img.cols,cv_img.rows,cv_img.channels());
imshow("img",cv_img);
waitKey(10);
/***************读图(end)********************/
/***********MTCNN运算(start)************/
float start = clock();
int times = 1;
ncnn::set_omp_num_threads(4);
for (int cnt = 0; cnt < times; cnt++)
{
std::vector finalBbox;
mtcnn Net;
//OpenCV读出的图片是BGR格式的,需要转为RGB格式,否则检出率会很低。
ncnn::Mat ncnn_img = ncnn::Mat::from_pixels(cv_img.data, ncnn::Mat::PIXEL_BGR2RGB, cv_img.cols, cv_img.rows);
Net.detect(ncnn_img, finalBbox);
for (vector::iterator it = finalBbox.begin(); it != finalBbox.end(); it++){
if ((*it).exist)
{
printf("Bbox [x1,y1], [x2,y2]:[%d,%d], [%d,%d] \n", (*it).x1, (*it).x2, (*it).y1, (*it).y2);
rectangle(cv_img, Point((*it).x1, (*it).y1), Point((*it).x2, (*it).y2), Scalar(0, 0, 255), 2, 8, 0);
for (int num = 0; num < 5; num++)
{
printf("Landmark [x1,y1]: [%d,%d] \n", (int)*(it->ppoint + num), (int)*(it->ppoint + num + 5));
circle(cv_img, Point((int)*(it->ppoint + num), (int)*(it->ppoint + num + 5)), 3, Scalar(0, 255, 255), -1);
}
}
}
}
/***********MTCNN运算(end)************/
printf("MTCNN mean time comsuming: %f ms\n",(clock()-start)/times);
imshow("result.jpg",cv_img);
waitKey(100);
system("pause");
return 0;
}
#endif