人脸识别三步:人脸检测、人脸对齐、特征提取。
numpy==1.18.0
onnxruntime==1.13.1
onnxruntime_directml==1.10.0
opencv_python==4.6.0.66
scikit_image==0.19.3
scikit_learn==1.1.3
from __future__ import division
import datetime
import numpy as np
import onnxruntime
import os.path as osp
import cv2
class SCRFD:
def __init__(self, model_file, providers=None, options=None, nms_thresh=0.4):
assert osp.exists(model_file)
if providers is None:
providers = ['CPUExecutionProvider']
if options is None:
options = onnxruntime.SessionOptions()
self.session = onnxruntime.InferenceSession(model_file, providers=providers, sess_options=options)
input_cfg = self.session.get_inputs()[0]
input_shape = input_cfg.shape
input_name = input_cfg.name
self.input_size = tuple(input_shape[2:4][::-1])
self.input_name = input_name
self.nms_thresh = nms_thresh
self.center_cache = {}
def forward(self, img, thresh):
scores_list = []
bboxes_list = []
kpss_list = []
input_size = tuple(img.shape[0:2][::-1])
blob = cv2.dnn.blobFromImage(img, 1.0 / 128, input_size, (127.5, 127.5, 127.5), swapRB=True)
net_outs = self.session.run([], {self.input_name: blob})
input_height = blob.shape[2]
input_width = blob.shape[3]
_feat_stride_fpn = [8, 16, 32]
for idx, stride in enumerate(_feat_stride_fpn):
scores = net_outs[idx][0]
bbox_preds = net_outs[idx + 3 * 1][0] * stride
kps_preds = net_outs[idx + 3 * 2][0] * stride
height = input_height // stride
width = input_width // stride
key = (height, width, stride)
if key in self.center_cache:
anchor_centers = self.center_cache[key]
else:
anchor_centers = np.stack(np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32)
anchor_centers = (anchor_centers * stride).reshape((-1, 2))
anchor_centers = np.stack([anchor_centers] * 2, axis=1).reshape((-1, 2))
if len(self.center_cache) < 100:
self.center_cache[key] = anchor_centers
pos_inds = np.where(scores >= thresh)[0]
if len(pos_inds) > 0:
bboxes = self.distance2bbox(anchor_centers, bbox_preds)
pos_scores = scores[pos_inds]
pos_bboxes = bboxes[pos_inds]
scores_list.append(pos_scores)
bboxes_list.append(pos_bboxes)
kpss = self.distance2kps(anchor_centers, kps_preds)
kpss = kpss.reshape((kpss.shape[0], -1, 2))
pos_kpss = kpss[pos_inds]
kpss_list.append(pos_kpss)
return scores_list, bboxes_list, kpss_list
def detect(self, img, threshold=0.5, max_num=0, metric='default'):
input_size = self.input_size
im_ratio = float(img.shape[0]) / img.shape[1]
model_ratio = float(input_size[1]) / input_size[0]
if im_ratio > model_ratio:
new_height = input_size[1]
new_width = int(new_height / im_ratio)
else:
new_width = input_size[0]
new_height = int(new_width * im_ratio)
det_scale = float(new_height) / img.shape[0]
resized_img = cv2.resize(img, (new_width, new_height))
det_img = np.zeros((input_size[1], input_size[0], 3), dtype=np.uint8)
det_img[:new_height, :new_width, :] = resized_img
scores_list, bboxes_list, kpss_list = self.forward(det_img, threshold)
if len(scores_list) == 0:
return np.empty(0), np.empty(0)
scores = np.vstack(scores_list)
scores_ravel = scores.ravel()
order = scores_ravel.argsort()[::-1]
bboxes = np.vstack(bboxes_list) / det_scale
kpss = np.vstack(kpss_list) / det_scale
pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
pre_det = pre_det[order, :]
keep = self.nms(pre_det)
det = pre_det[keep, :]
kpss = kpss[order, :, :]
kpss = kpss[keep, :, :]
if max_num > 0 and det.shape[0] > max_num:
area = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
img_center = img.shape[0] // 2, img.shape[1] // 2
offsets = np.vstack([
(det[:, 0] + det[:, 2]) / 2 - img_center[1],
(det[:, 1] + det[:, 3]) / 2 - img_center[0]
])
offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
if metric == 'max':
values = area
else:
values = area - offset_dist_squared * 2.0 # some extra weight on the centering
bindex = np.argsort(values)[::-1] # some extra weight on the centering
bindex = bindex[0:max_num]
det = det[bindex, :]
if kpss is not None:
kpss = kpss[bindex, :]
return det, kpss
def nms(self, dets):
thresh = self.nms_thresh
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return keep
def distance2bbox(self, points, distance, max_shape=None):
"""Decode distance prediction to bounding box.
Args:
points (Tensor): Shape (n, 2), [x, y].
distance (Tensor): Distance from the given point to 4
boundaries (left, top, right, bottom).
max_shape (tuple): Shape of the image.
Returns:
Tensor: Decoded bboxes.
"""
x1 = points[:, 0] - distance[:, 0]
y1 = points[:, 1] - distance[:, 1]
x2 = points[:, 0] + distance[:, 2]
y2 = points[:, 1] + distance[:, 3]
if max_shape is not None:
x1 = x1.clamp(min=0, max=max_shape[1])
y1 = y1.clamp(min=0, max=max_shape[0])
x2 = x2.clamp(min=0, max=max_shape[1])
y2 = y2.clamp(min=0, max=max_shape[0])
return np.stack([x1, y1, x2, y2], axis=-1)
def distance2kps(self, points, distance, max_shape=None):
preds = []
for i in range(0, distance.shape[1], 2):
px = points[:, i % 2] + distance[:, i]
py = points[:, i % 2 + 1] + distance[:, i + 1]
if max_shape is not None:
px = px.clamp(min=0, max=max_shape[1])
py = py.clamp(min=0, max=max_shape[0])
preds.append(px)
preds.append(py)
return np.stack(preds, axis=-1)
import numpy as np
import cv2
from skimage import transform
################### 标准脸的关键点 ###################
REFERENCE_FACIAL_POINTS = np.array([
# 112×112
[38.2946, 51.6963],
[73.5318, 51.5014],
[56.0252, 71.7366],
[41.5493, 92.3655],
[70.7299, 92.2041]
], np.float32)
def align(image, landmark, dsize=(112, 112)):
# 变换矩阵
trans = transform.SimilarityTransform()
trans.estimate(landmark, REFERENCE_FACIAL_POINTS)
M = trans.params
# 人脸对齐
face_img = cv2.warpAffine(image, M[:2, :], dsize, borderValue=3.0)
return face_img
import numpy as np
import cv2
from sklearn import preprocessing
import onnxruntime
import alignface
class ArcFace:
def __init__(self, model_file, providers=None, options=None):
assert model_file is not None
self.input_mean = 127.5
self.input_std = 127.5
if providers is None:
providers = ['CPUExecutionProvider']
if options is None:
options = onnxruntime.SessionOptions()
self.session = onnxruntime.InferenceSession(model_file, providers=providers, options=options)
input_cfg = self.session.get_inputs()[0]
input_shape = input_cfg.shape
input_name = input_cfg.name
self.input_size = tuple(input_shape[2:4][::-1])
self.input_name = input_name
def get_feature(self, img, landmark):
_img = alignface.align(img, landmark=landmark)
embedding = self.forward(_img).flatten()
embedding = np.array(embedding).reshape((1, -1))
embedding = preprocessing.normalize(embedding)
return embedding
def forward(self, imgs):
if not isinstance(imgs, list):
imgs = [imgs]
input_size = self.input_size
blob = cv2.dnn.blobFromImages(imgs, 1.0 / self.input_std, input_size,
(self.input_mean, self.input_mean, self.input_mean), swapRB=True)
net_out = self.session.run([], {self.input_name: blob})[0]
return net_out
import cv2
import os
import numpy as np
import onnxruntime
from scrfd import SCRFD
from arcface import ArcFace
class FaceRecognition:
def __init__(self, dete_model=None, reco_model=None, ctx_id=0, dete_threshold=0.50, reco_threshold=1.24):
# 人脸识别工具类
# param ctx_id: 非负数为GPU的ID,负数为使用CPU
# param reco_threshold: 人脸识别阈值
# param reco_threshold: 人脸检测阈值
providers = ['CPUExecutionProvider']
options = onnxruntime.SessionOptions()
if ctx_id >= 0:
providers = ['DmlExecutionProvider']
options.enable_mem_pattern = False
options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
self.detector = SCRFD(model_file=dete_model, providers=providers, options=options)
self.recognizer = ArcFace(model_file=reco_model, providers=providers, options=options)
self.reco_threshold = reco_threshold
self.dete_threshold = dete_threshold
self.faces_embedding = []
# 加载人脸库中的人脸
def load_faces(self, face_db_path):
if not os.path.exists(face_db_path):
os.makedirs(face_db_path)
for root, dirs, files in os.walk(face_db_path):
for file in files:
input_image = cv2.imdecode(np.fromfile(os.path.join(root, file), dtype=np.uint8), 1)
user_id = file.split(".")[0]
result = self.register(input_image, user_id)
if result != 'success':
print(file + ':' + result)
# 人脸识别
def recognize(self, image):
dets, landmarks = self.detector.detect(image, threshold=self.dete_threshold)
results = list()
for det, landmark in zip(dets, landmarks):
embedding = self.recognizer.get_feature(image, landmark)
for com_face in self.faces_embedding:
r = self.feature_compare(embedding, com_face["feature"])
print("recognize: {}/{}".format(com_face["user_id"], r))
if r < self.reco_threshold:
result = dict()
result["user_id"] = com_face["user_id"]
result["bbox"] = (np.array(det)[:4]).astype(np.int32).tolist()
result["landmark"] = np.array(landmark).astype(np.int32).tolist()
results.append(result)
return results
@staticmethod
def feature_compare(feature1, feature2):
diff = np.subtract(feature1, feature2)
dist = np.sum(np.square(diff), 1)
return dist
# 注册人脸库
def register(self, image, user_id):
bboxes, landmarks = self.detector.detect(image, threshold=self.dete_threshold)
if bboxes.shape[0] == 0:
return '检测不到人脸'
if bboxes.shape[0] > 1:
return '检测到多个人脸'
# 判断人脸是否存在
embedding = self.recognizer.get_feature(image, landmarks[0])
for com_face in self.faces_embedding:
r = self.feature_compare(embedding, com_face["feature"])
if r < self.reco_threshold:
return com_face["user_id"] + ' 已存在'
# 符合注册条件保存图片,同时把特征添加到人脸特征库中
# cv2.imencode('.png', image)[1].tofile(os.path.join(self.face_db, '%s.png' % user_id))
self.faces_embedding.append({
"user_id": user_id,
"feature": embedding
})
return "success"
注意:providers可以更具自己硬件环境修改。
if __name__ == '__main__':
dete_model = r'X:\xxx\scrfd_10g_bnkps_shape640x640.onnx'
reco_model = r'X:\xxx\w600k_r50.onnx'
face_db_path = r'X:\xxx\face_db'
face_reco = FaceRecognition(dete_model=dete_model, reco_model=reco_model, ctx_id=0)
face_reco.load_faces(face_db_path=face_db_path)
img = cv2.imread(r'X:\xxx\test.jpg')
ta = datetime.datetime.now()
results = face_reco.recognize(img)
print('all cost(ms):', (datetime.datetime.now() - ta).total_seconds() * 1000)
for result in results:
x1, y1, x2, y2 = result["bbox"]
cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.putText(img=img, text=result["user_id"], org=(x1, y1 - 10),
fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, color=(0, 255, 0), thickness=1)
for landmark in result["landmark"]:
cv2.circle(img, tuple(landmark), 1, (0, 0, 255), 2)
cv2.imshow('Image', img)
cv2.waitKey()
cv2.destroyAllWindows()
说明: