在python环境下实现人脸检测有多种方式,最简单易用的是使用opencv或者dlib,两者都提供了经典方法和深度学习方法进行人脸检测:
res10_300x300_ssd_iter_140000_fp16.caffemodel+deploy.prototxt
,或者TensorFlow的量化版本opencv_face_detector_uint8.pb+opencv_face_detector.pbtxt
"F:\opencv\sources\samples\dnn\face_detector\download_weights.py"
下面的两段代码都可以实现dnn模块加载深度学习模型,第二段代码使用cv2.dnn_DetectionModel接口比较简洁。
import numpy as np
import argparse
import cv2
args = {
'prototxt':r"F:\opencv\sources\samples\dnn\face_detector\deploy.prototxt",
'model':r"F:\opencv\sources\samples\dnn\face_detector\res10_300x300_ssd_iter_140000_fp16.caffemodel",
'image':r"C:\Users\admin\Pictures\faces.png",
'confidence':0.5}
# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
# load the input image and construct an input blob for the image
# by resizing to a fixed 300x300 pixels and then normalizing it
image = cv2.imread(args["image"])
(h, w) = image.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)),
scalefactor=1.0,
size=(300, 300),
mean=(104.0, 177.0, 123.0),
swapRB=True
)
# pass the blob through the network and obtain the detections and
# predictions
print("[INFO] computing object detections...")
net.setInput(blob)
detections = net.forward()
# loop over the detections
for i in range(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with the
# prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the `confidence` is
# greater than the minimum confidence
if confidence > args["confidence"]:
# compute the (x, y)-coordinates of the bounding box for the
# object
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# draw the bounding box of the face along with the associated
# probability
text = "{:.2f}%".format(confidence * 100)
y = startY - 10 if startY - 10 > 10 else startY + 10
cv2.rectangle(image, (startX, startY), (endX, endY),
(0, 0, 255), 2)
cv2.putText(image, text, (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)
# show the output image
cv2.imshow("Output", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
#%%
# 加载人脸检测模型
model_path = r"F:\opencv\sources\samples\dnn\face_detector\opencv_face_detector_uint8.pb"
pbtxt_path=r"F:\opencv\sources\samples\dnn\face_detector\opencv_face_detector.pbtxt"
# net = cv2.dnn_DetectionModel(args["model"], args["prototxt"])
net = cv2.dnn_DetectionModel(model_path, pbtxt_path)
# net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
# net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
net.setInputSize(300,300) # 设置网络输入尺寸
net.setInputScale(1.0) # 注意这里没有归一化
net.setInputSwapRB(True)
frame=cv2.imread(args["image"])
import time
s = time.time()
for _ in range(50):
classes, confs, boxes = net.detect(frame, 0.5, 0.5)
print('平均推理时间:', (time.time() - s) / 50.0)
classes, confs, boxes = net.detect(frame, 0.5, 0.5)
names = ['_background_', 'face']
for id, conf, box in zip(classes.flatten(), confs.flatten(), boxes):
label = '{}, {:.2f}'.format(names[id], conf)
# print(label)
labelsize, baseLine= cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX,0.5,1)
left, top, width, height = box
top = max(top, labelsize[1])
cv2.rectangle(frame, box, color=(0, 255, 0), thickness=3)
cv2.rectangle(frame, (left, top-labelsize[1]),
(left+labelsize[0], top+baseLine),(255, 255, 255), cv2.FILLED)
cv2.putText (frame, label,(left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5,(0,0, 0))
cv2.imshow('frame', frame)
cv2.waitKey(0)
cv2.destroyAllWindows()
取消相关注释之后可以检测眼睛。
#%% haar
def detectAndDisplay(frame):
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frame_gray = cv2.equalizeHist(frame_gray)
#-- Detect faces
faces = face_cascade.detectMultiScale(frame_gray)
for (x,y,w,h) in faces:
center = (x + w//2, y + h//2)
frame = cv2.ellipse(frame, center, (w//2, h//2), 0, 0, 360, (255, 0, 255), 4)
# faceROI = frame_gray[y:y+h,x:x+w]
# #-- In each face, detect eyes
# eyes = eyes_cascade.detectMultiScale(faceROI)
# for (x2,y2,w2,h2) in eyes:
# eye_center = (x + x2 + w2//2, y + y2 + h2//2)
# radius = int(round((w2 + h2)*0.25))
# frame = cv2.circle(frame, eye_center, radius, (255, 0, 0 ), 4)
return frame
face_cascade = cv2.CascadeClassifier()
eyes_cascade = cv2.CascadeClassifier()
#-- 1. Load the cascades
if not face_cascade.load(r"F:\opencv-4.4.0\data\haarcascades\haarcascade_frontalface_alt.xml"):
print('--(!)Error loading face cascade')
exit(0)
if not eyes_cascade.load(r"F:\opencv-4.4.0\data\haarcascades\haarcascade_eye.xml"):
print('--(!)Error loading eyes cascade')
exit(0)
frame=cv2.imread(args["image"])
import time
s = time.time()
for _ in range(50):
_ = detectAndDisplay(frame)
print('平均推理时间:', (time.time() - s) / 50.0)
frame = detectAndDisplay(frame)
cv2.imshow('frame', frame)
cv2.waitKey(0)
cv2.destroyAllWindows()
预训练模型可以从http://dlib.net/files/mmod_human_face_detector.dat.bz2下载。
#%% dlib
# get the mmod_human_face_detector.dat file from:
# http://dlib.net/files/mmod_human_face_detector.dat.bz2
f=r'E:\Applications\WPy64-3741\python-3.7.4.amd64\Lib\site-packages\face_recognition_models\models\mmod_human_face_detector.dat'
cnn_face_detector = dlib.cnn_face_detection_model_v1(f)
win = dlib.image_window()
img = dlib.load_rgb_image(args['image'])
import time
s = time.time()
for _ in range(10):
_ = cnn_face_detector(img, 1)
print('平均推理时间:', (time.time() - s) / 10.0)
dets = cnn_face_detector(img, 1)
print("Number of faces detected: {}".format(len(dets)))
for i, d in enumerate(dets):
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {} Confidence: {}".format(
i, d.rect.left(), d.rect.top(), d.rect.right(), d.rect.bottom(), d.confidence))
rects = dlib.rectangles()
rects.extend([d.rect for d in dets])
win.clear_overlay()
win.set_image(img)
win.add_overlay(rects)
dlib.hit_enter_to_continue()
#%% dlib
import dlib
import matplotlib.pyplot as plt
def rect_to_bb(rect):
# take a bounding predicted by dlib and convert it
# to the format (x, y, w, h) as we would normally do
# with OpenCV
x = rect.left()
y = rect.top()
w = rect.right() - x
h = rect.bottom() - y
# return a tuple of (x, y, w, h)
return (x, y, w, h)
# gray = cv2.imread(args['image'],1)
from PIL import Image
img = np.array(Image.open(args['image']).convert('RGB'))
face_detect = dlib.get_frontal_face_detector()
import time
s = time.time()
for _ in range(50):
_ = face_detect(img, 1)
print('平均推理时间:', (time.time() - s) / 50.0)
rects = face_detect(img, 1)
for (i, rect) in enumerate(rects):
(x, y, w, h) = rect_to_bb(rect)
cv2.rectangle(img, (x, y), (x + w, y + h), (255, 255, 255), 3)
plt.figure(figsize=(12,8))
plt.imshow(img, cmap='gray')
plt.show()
最终这4种模型的检测对比结果如下图所示。
检测速度如下(CPU环境,配置为Intel®Xeon® CPU E5-2650 v2@ 2.60GHz 2.60 GHz):
结合检测速度和实际检测效果,opencv的传统人脸检测方法仍然具有较大优势,对于图中的小尺寸人脸haar模型可以正确识别,其他模型都无法检测出来。另外,dlib的两个模型效果都差强人意,特别是cnn版本的人脸检测模型,检测速度实在太慢,精度还一般。