Face Landmark
[1]. mtcnn-pytorch | Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Networks[2016]
[2]. mtcnn-blog: https://blog.csdn.net/On_theway10/article/details/102628169
[3]. pfld-pytorch | PFLD: A Practical Facial Landmark Detector[2019-arXiv]
[4]. metricface-pytorch | pytorch for sphereface, cosface, addface, arcface.
Opencv : Aligment-naive
def face_alignment(img, points, scale=1.0):
# 以mtcnn输出的5-landmark为例
center = tuple(points[:2,:].mean(axis=0).astype(int)) # 以眉心作为中心
# center = tuple(points[2].astype(int)) # 鼻尖为中心
dy = points[1][1] - points[0][1]
dx = points[1][0] - points[0][0]
angle = cv2.fastAtan2(dy, dx)
r_mat = cv2.getRotationMatrix2D(center, angle, scale=scale)
r_img = cv2.warpAffine(img, r_mat, dsize=(img.shape[1], img.shape[0]))
return r_img
Result
Opencv : Aligment-classical
模型假设:对于任意的人脸[必须与模板人脸(正脸)的尺度相同],可以通过旋转和平移这两个操作来得到正脸!
数学表达:X * T = M 或者 X = M * T^(-1)这里M和X依次表示正脸和任意状态的脸,T表示变换矩阵。
最优解:T* = argmin_T ||X * T - M||^2
下面以mtcnn得到的5-keypoints为例,详解face-alignment的细节:
step - 1. 确定正脸模板landmark参数(这里以module_size = 250 x 250为例子)
dst_pts = np.array([[78.89219125 115.39352792], [170.65572739 114.95848213], [125.06562471 160.1263455 ],
[ 87.36797174 206.1729942 ], [163.35910559 205.8127267 ]],dtype=np.float32)
step - 2.利用mtcnn获取待矫正人脸的face_landmark:
bbox, src_pts = mtcnn.detect_face(src_img) # mtcnn-pytorch, src_img.shape = (250, 250, 3)
step - 3.求解变换矩阵Trans, [详见getTransMatrix函数]
step - 4.利用变换矩阵Trans来align人脸
ali_img = cv2.warpAffine(src_img, Trans, crop_size) # crop_size = module_size
Experiment
从左到右依次是:测试图片、dst_pts可视化、src_pts和dst_pts在测试图上的可视化、ref_pts和dst_pts在矫正后的人脸上的可视化.
#!/usr/bin/env python3
#-*- coding:utf-8 -*-
import os
import cv2
import numpy as np
from numpy.linalg import inv
def getTransMatrix(src_pts, dst_pts):
num_pts = src_pts.shape[0]
x = src_pts[:, 0].reshape((-1, 1))
y = src_pts[:, 1].reshape((-1, 1))
temp1 = np.hstack((x, -y, np.ones((num_pts, 1)), np.zeros((num_pts, 1))))
temp2 = np.hstack((y, x, np.zeros((num_pts, 1)), np.ones((num_pts, 1))))
X_src = np.vstack((temp1, temp2))
Y_dst = np.vstack((dst_pts[:, 0].reshape((-1, 1)), dst_pts[:, 1].reshape((-1, 1))))
X = np.dot(X_src.T, X_src) + np.eye(4, 4) * 1e-6 # (X'X + epsilon * I)
Y = np.dot(X_src.T, Y_dst)
r = np.dot(inv(X), Y)
ref_data = np.dot(X_src, r)[:, 0]
ref_pts = np.zeros((num_pts, 2))
for i in range(num_pts):
ref_pts[i] = [ref_data[i], ref_data[num_pts+i]]
r = r[:, 0]
Trans = np.array([[r[0], -r[1], r[2]],
[r[1], r[0], r[3]]])
return Trans, ref_pts
if __name__ == "__main__":
dst_pts = dst_pts = np.array([[78.89219125, 115.39352792], [170.65572739, 114.95848213], \
[125.06562471, 160.1263455 ], [ 87.36797174, 206.1729942 ], \
[163.35910559, 205.8127267 ]],dtype=np.float32)
src_pts = np.array([[ 95, 120], [151, 98], [123, 131], [112, 171], [162, 149]])
src_img = cv2.imread('test.jpg')
Trans, ref_pts = getTransMatrix(src_pts, dst_pts)
ali_img = cv2.warpAffine(src_img, Trans, (250, 250))
cv2.imwrite('lookme.jpg', ali_img)