#include
#include
float anchor[] = { 0.469385, 0.710431,
0.920628, 1.367715,
1.274924, 1.906141,
1.845246, 2.771376,
2.994701, 4.442390 };
int N_anchors = 5;
//float anchor[] = {
//0.512633, 0.512633,
//1.030616, 1.030616,
//1.455134, 1.455134,
//2.127006, 2.127006,
//3.192079, 3.192079
//};
cv::Mat letterBoxImg(cv::Mat ori, int sz)
{
int w = MAX(ori.rows, ori.cols);
cv::Mat im = cv::Mat(w, w, ori.type(),cv::Scalar::all(128));
ori.copyTo(im(cv::Rect(0,0,ori.cols,ori.rows)));
cv::Mat sm;
cv::resize(im, sm, cv::Size(sz, sz));
return sm;
}
struct FaceInfo {
float score;
int anchor_idx;
float x;
float y;
float w;
float h;
};
std::vector< FaceInfo> decode_yolo(const cv::Mat& out);
int main()
{
cv::VideoCapture cap(0);
std::string pbfile = "D:\\project\\python\\k210_yolo_v2_face\\Freeze_save.pb";
cv::dnn::Net net = cv::dnn::readNetFromTensorflow(pbfile);
if (net.empty()) {
std::cout << " net is empty\n";
}
while (1) {
cv::Mat im;
std::string imgfile = "timg.jpg";
//cv::Mat im = letterBoxImg(cv::imread(imgfile), 224);
cap >> im;
cv::Mat rgb;
cv::cvtColor(im, rgb, CV_BGR2RGB);
//cv::imshow("img", rgb); cv::waitKey(0);
rgb = letterBoxImg(rgb, 224);
cv::Mat remat = rgb.reshape(1);
double maxval = 0;
cv::minMaxLoc(remat, 0, &maxval);
cv::Mat blob = cv::dnn::blobFromImage(rgb,1.0 / maxval,cv::Size());
std::cout << "out size=" << blob.size << "\n";
net.setInput(blob);
cv::Mat output = net.forward();
std::cout << "out size=" << output.size << "\n";
for (int i = 0; i < 4; ++i) {
std::cout << "s" << i << ":" << output.size.p[i] << ",";
}
std::cout << "\n";
for (int i = 0; i < 4; ++i) {
std::cout << "p" << i << ":" << output.step.p[i] << ",";
}
std::cout << "\n";
std::vector< FaceInfo> faces = decode_yolo(output);
std::cout << "face zie = " << faces.size() << "\n";
for (auto f : faces) {
if (f.score < 0.7) {
continue;
}
std::cout << f.x << "," << f.y << "," << f.w << "," << f.h << "\n";
std::cout << f.score << "\n";
cv::rectangle(rgb, cv::Rect(f.x, f.y, f.w, f.h), cv::Scalar(255, 255, 255));
//cv::circle(rgb, cv::Point(f.y, f.x), 5, cv::Scalar(255, 255, 255));
}
//printf("out size", output.size);
cv::imshow("img", rgb);
int key = cv::waitKey(100);
if (key == 27) break;
}
//system("pause");
return 0;
}
float sigmoid(float x)
{
return (1.0 / (1 + exp(-x)));
}
template
T blob_val(const cv::Mat& out, int c, int h, int w) {
T* val = (T*)out.ptr(0, c, h);
return val[w];
}
std::vector< FaceInfo> decode_yolo(const cv::Mat& out)
{
std::vector< FaceInfo> faces;
CV_Assert(out.type() == CV_32F);
for (int i = 4; i < N_anchors*6; i+=6) {
for (int r = 0; r < 7; r++) {
for (int c = 0; c < 7; c++) {
float val = blob_val
val = sigmoid(val);
float _x = blob_val
float _y = blob_val
float _w = blob_val
float _h = blob_val
int anch_idx = int(i / 6);
float wid = exp(_w)*anchor[anch_idx * 2] * 224 / 7.0;
float hei = exp(_h)*anchor[anch_idx * 2+1] * 224 / 7.0;
float cx = (sigmoid(_x) + c) * 224 / 7.0;
float cy = (sigmoid(_y) + r) * 224 / 7.0;
FaceInfo f = { val, anch_idx, cx - wid/2, cy - hei/2, wid, hei};
faces.push_back(f);
}
}
}
std::vector
std::vector
std::vector
for (auto f : faces) {
boxes.push_back(cv::Rect(f.x, f.y, f.w, f.h));
confidences.push_back(f.score);
}
cv::dnn::NMSBoxes(boxes, confidences, 0.8, 0.3, indices);
faces.clear();
faces.reserve(indices.size());
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
cv::Rect& box = boxes[idx];
faces.push_back({ confidences[idx],0,(float)box.x,(float)box.y,(float)box.width,(float)box.height });
}
return faces;
}