reference:利用OpenCV和深度学习实现人脸检测
2018年之后,opencv DNN模式下开始使用卷积神经网络SSD人脸检测器,目前商业应用非常成熟,可以做到实时运行,对各种角度人脸均能做到准确的检测,具有很强的抗干扰性。
opencv自带的人脸检测模型
打开windows下的终端,点击左下角的徽标键,输入cmd即可。然后在终端输入cd /d D:\opencv-4.1.0\opencv\sources\samples\dnn\face_detector
然后继续输入:python download_weights.py,这样就会根据权重文件生成配置文件和模型文件。
进入models.yml文件,查看需要设置的参数。文件路径:D:\opencv-4.1.0\opencv\sources\samples\dnn
基于caffe框架下的SSD深度卷积神经网络模型,做人脸检测。
.prototxt和.caffemodel的作用如下:
The .prototxt file(s) which define the model architecture (i.e., the layers themselves)
The .caffemodel file which contains the weights for the actual layers
#include
#include
#include
using namespace cv;
using namespace cv::dnn;
using namespace std;
String model_text_file = "D:/opencv-4.1.0/models/face_detector/deploy.prototxt";
String modelFile = "D:/opencv-4.1.0/models/face_detector/res10_300x300_ssd_iter_140000_fp16.caffemodel";
int main(int argc, char** argv) {
VideoCapture capture;
capture.open(0);
namedWindow("input", WINDOW_AUTOSIZE);
int w = capture.get(CAP_PROP_FRAME_WIDTH);
int h = capture.get(CAP_PROP_FRAME_HEIGHT);
printf("frame width : %d, frame height : %d", w, h);
// set up net
Net net = readNetFromCaffe(model_text_file, modelFile);
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(DNN_TARGET_CPU);
Mat frame;
while (capture.read(frame)) {
flip(frame, frame, 1);
imshow("input", frame);
//预测
Mat inputblob = blobFromImage(frame, 1.0, Size(300, 300), Scalar(104,177,123), false);
net.setInput(inputblob, "data");
Mat detection = net.forward("detection_out");
//检测
Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
float confidence_threshold = 0.25;
for (int i = 0; i < detectionMat.rows; i++) {
float confidence = detectionMat.at<float>(i, 2);
if (confidence > confidence_threshold) {
size_t objIndex = (size_t)(detectionMat.at<float>(i, 1));
float tl_x = detectionMat.at<float>(i, 3) * frame.cols;
float tl_y = detectionMat.at<float>(i, 4) * frame.rows;
float br_x = detectionMat.at<float>(i, 5) * frame.cols;
float br_y = detectionMat.at<float>(i, 6) * frame.rows;
Rect object_box((int)tl_x, (int)tl_y, (int)(br_x - tl_x), (int)(br_y - tl_y));
rectangle(frame, object_box, Scalar(0, 0, 255), 2, 8, 0);
putText(frame, format("%s %.2f", "face", confidence), Point(tl_x, tl_y), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(255, 0, 0), 2);
}
}
vector < double>layerstimings;
double freq = getTickFrequency() / 1000;
double time = net.getPerfProfile(layerstimings) / freq;
ostringstream ss;
ss << "FPS" << 1000 / time << ";time:" << time << "ms";
putText(frame, ss.str(), Point(20, 20), FONT_HERSHEY_PLAIN, 1, Scalar(0, 0, 255), 2, 8);
imshow("face-detection-demo", frame);
char c = waitKey(5);
if (c == 27) { // ESC退出
break;
}
}
capture.release();//释放资源
waitKey(0);
return 0;
}
基于TensorFlow的SSD模型,人脸检测。
#include
#include
#include
using namespace cv;
using namespace cv::dnn;
using namespace std;
String model_text_file = "D:/opencv-4.1.0/models/face_detector/opencv_face_detector.pbtxt";
String modelFile = "D:/opencv-4.1.0/models/face_detector/opencv_face_detector_uint8.pb";
int main(int argc, char** argv) {
VideoCapture capture;
capture.open(0);
namedWindow("input", WINDOW_AUTOSIZE);
int w = capture.get(CAP_PROP_FRAME_WIDTH);
int h = capture.get(CAP_PROP_FRAME_HEIGHT);
printf("frame width : %d, frame height : %d", w, h);
// set up net
Net net = readNetFromTensorflow(modelFile, model_text_file);
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(DNN_TARGET_CPU);
Mat frame;
while (capture.read(frame)) {
flip(frame, frame, 1);
imshow("input", frame);
//预测
Mat inputblob = blobFromImage(frame, 1.0, Size(300, 300), Scalar(104,177,123), false);
net.setInput(inputblob, "data");
Mat detection = net.forward("detection_out");
//检测
Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
float confidence_threshold = 0.25;
for (int i = 0; i < detectionMat.rows; i++) {
float confidence = detectionMat.at<float>(i, 2);
if (confidence > confidence_threshold) {
size_t objIndex = (size_t)(detectionMat.at<float>(i, 1));
float tl_x = detectionMat.at<float>(i, 3) * frame.cols;
float tl_y = detectionMat.at<float>(i, 4) * frame.rows;
float br_x = detectionMat.at<float>(i, 5) * frame.cols;
float br_y = detectionMat.at<float>(i, 6) * frame.rows;
Rect object_box((int)tl_x, (int)tl_y, (int)(br_x - tl_x), (int)(br_y - tl_y));
rectangle(frame, object_box, Scalar(0, 0, 255), 2, 8, 0);
putText(frame, format("%s %.2f", "face", confidence), Point(tl_x, tl_y), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(255, 0, 0), 2);
}
}
vector < double>layerstimings;
double freq = getTickFrequency() / 1000;
double time = net.getPerfProfile(layerstimings) / freq;
ostringstream ss;
ss << "FPS" << 1000 / time << ";time:" << time << "ms";
putText(frame, ss.str(), Point(20, 20), FONT_HERSHEY_PLAIN, 1, Scalar(0, 0, 255), 2, 8);
imshow("face-detection-demo", frame);
char c = waitKey(5);
if (c == 27) { // ESC退出
break;
}
}
capture.release();//释放资源
waitKey(0);
return 0;
}