opencv基于深度学习的人脸检测

opencv3.4 版之前自带的人脸检测器是基于Haar+Adaboost的,速度还可以,但是检出率很低,误检也很多,脸的角度稍大就检不出来,还经常会把一些乱七八糟的东西当做人脸,实在不敢恭维。好在随着深度学习领域的发展,涌现了一大批效果相当不错的人脸检测算法,比如MTCNN,给了我们更多施展的空间。看看下面这图就是基于其检测出来的,看着是不是很震撼呢?源码点此

opencv基于深度学习的人脸检测_第1张图片

MTCNN效果着实不错,但其是基于caffe训练的,caffe可是以配置繁琐著称的,大大小小依赖库就有10几个,每个又有好几个版本,版本间的不兼容比比皆是,初学者没个把星期基本是配不好的,这无形间加大了普及的进度,好在有人做了一整套MTCNN在各个平台上的部署(见github),大大简化了所需的工作量。不过要是opencv能有个基于深度学习的检测器该多好呀?

所谓千呼万唤始出来,在2017年圣诞夜的前夕opencv发布了3.4版本,主要增强了dnn模块,特别是添加了对faster-rcnn的支持,并且带有openCL加速,效果还不错。

我主要测试了下其中的resnetface示例,效果还不错,除了速度上有些慢之外,基本上是向MTCNN看齐了。

模型位于${OPENCV_DIR}\sources\samples\dnn\face_detector,里面有个download_weights.py,执行下就可以自动下载模型了。

python版代码如下:

#coding=utf-8
import numpy as np
import cv2,os,time

def show_detections(image,detections):
    h,w,c=image.shape
    for i in range(0, detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence >0.6:
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")
            text = "{:.2f}%".format(confidence * 100)
            y = startY - 10 if startY - 10 > 10 else startY + 10
            cv2.rectangle(image, (startX, startY), (endX, endY),
                (0, 255,0), 1)
            cv2.putText(image, text, (startX, y),
                cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2)
    return image
 
def detect_img(net,image):
    blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0,
	(300, 300), (104.0, 177.0, 123.0))
    net.setInput(blob)
    start=time.time()
    detections = net.forward()
    end=time.time()
    print(end-start)
    return show_detections(image,detections)

def test_dir(net,dir="images"):
    files=os.listdir(dir)
    for file in files:
        filepath=dir+"/"+file
        img=cv2.imread(filepath)
        showimg=detect_img(net,img)
        cv2.imshow("img",showimg)
        cv2.waitKey()

def test_camera(net):
    cap=cv2.VideoCapture(0)
    while True:
        ret,img=cap.read()
        if not ret:
            break
        showimg=detect_img(net,img)
        cv2.imshow("img",showimg)
        cv2.waitKey(1)      
    
if __name__=="__main__":
    net = cv2.dnn.readNetFromCaffe("deploy.prototxt","res10_300x300_ssd_iter_140000_fp16.caffemodel")
    #net =cv2.dnn.readNetFromTensorflow("opencv_face_detector_uint8.pb","opencv_face_detector.pbtxt")
    #test_dir(net)
    test_camera(net)

其支持caffe和tensorflow两种模型,速度在35毫秒左右(CPU:i7 [email protected],4核,占用率60%)

 

C++版代码如下:

#include 
#include 
#include "mropencv.h"
using namespace cv;
using namespace cv::dnn;
using namespace std;
const size_t inWidth = 300;
const size_t inHeight = 300;
const double inScaleFactor = 1.0;
const Scalar meanVal(104.0, 177.0, 123.0);

int main(int argc, char** argv)
{
    float min_confidence = 0.5;
    String modelConfiguration = "face_detector/deploy.prototxt";
    String modelBinary = "face_detector/res10_300x300_ssd_iter_140000.caffemodel";
    //! [Initialize network]
    dnn::Net net = readNetFromCaffe(modelConfiguration, modelBinary);
    //! [Initialize network]
    if (net.empty())
    {
        cerr << "Can't load network by using the following files: " << endl;
        cerr << "prototxt:   " << modelConfiguration << endl;
        cerr << "caffemodel: " << modelBinary << endl;
        cerr << "Models are available here:" << endl;
        cerr << "/samples/dnn/face_detector" << endl;
        cerr << "or here:" << endl;
        cerr << "https://github.com/opencv/opencv/tree/master/samples/dnn/face_detector" << endl;
        exit(-1);
    }

    VideoCapture cap(0);
    if (!cap.isOpened())
    {
        cout << "Couldn't open camera : " << endl;
        return -1;
    }
    for (;;)
    {
        Mat frame;
        cap >> frame; // get a new frame from camera/video or read image

        if (frame.empty())
        {
            waitKey();
            break;
        }

        if (frame.channels() == 4)
            cvtColor(frame, frame, COLOR_BGRA2BGR);

        //! [Prepare blob]
        Mat inputBlob = blobFromImage(frame, inScaleFactor,
            Size(inWidth, inHeight), meanVal, false, false); //Convert Mat to batch of images
                                                             //! [Prepare blob]

                                                             //! [Set input blob]
        net.setInput(inputBlob, "data"); //set the network input
                                         //! [Set input blob]

                                         //! [Make forward pass]
        Mat detection = net.forward("detection_out"); //compute output
                                                      //! [Make forward pass]

        vector layersTimings;
        double freq = getTickFrequency() / 1000;
        double time = net.getPerfProfile(layersTimings) / freq;

        Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr());

        ostringstream ss;
        ss << "FPS: " << 1000 / time << " ; time: " << time << " ms";
        putText(frame, ss.str(), Point(20, 20), 0, 0.5, Scalar(0, 0, 255));

        float confidenceThreshold = min_confidence;
        for (int i = 0; i < detectionMat.rows; i++)
        {
            float confidence = detectionMat.at(i, 2);

            if (confidence > confidenceThreshold)
            {
                int xLeftBottom = static_cast(detectionMat.at(i, 3) * frame.cols);
                int yLeftBottom = static_cast(detectionMat.at(i, 4) * frame.rows);
                int xRightTop = static_cast(detectionMat.at(i, 5) * frame.cols);
                int yRightTop = static_cast(detectionMat.at(i, 6) * frame.rows);

                Rect object((int)xLeftBottom, (int)yLeftBottom,
                    (int)(xRightTop - xLeftBottom),
                    (int)(yRightTop - yLeftBottom));

                rectangle(frame, object, Scalar(0, 255, 0));

                ss.str("");
                ss << confidence;
                String conf(ss.str());
                String label = "Face: " + conf;
                int baseLine = 0;
                Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
                rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height),
                    Size(labelSize.width, labelSize.height + baseLine)),
                    Scalar(255, 255, 255), CV_FILLED);
                putText(frame, label, Point(xLeftBottom, yLeftBottom),
                    FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0));
            }
        }
        cv::imshow("detections", frame);
        if (waitKey(1) >= 0) break;
    }
    return 0;
}

C++版需要一个mropencv.h,可以在MRHead找到。

 

 

你可能感兴趣的:(深度学习,人脸检测)