人脸关键点估计人头姿态

一、前言

      本篇主要记录由mtcnn检测得的关键点作人头姿态估计,思路较为简单,mtcnn是一种可以检测输出5个关键点的人脸检测算法,分别是左眼,右眼,鼻尖,嘴的左角和嘴的右角。当获得图像中人脸的5个2D关键点,再由Opencv中POSIT的姿态估计算法将5个世界坐标系的模板3D关键点通过旋转、平移等变换投射至这5个2D关键点,进而估计得变换参数,最后求得2D平面中的人头的姿态参数,分别为Yaw:摇头  左正右负、Pitch:点头 上负下正、Roll:摆头(歪头)左负 右正

二、Mtcnn-light

    对于mtcnn,网上具有较多开源版本,这里使用light版本,优点是速度较快,缺点为模型准确性略有下降,为输出5个关键点,对原来src/mtcnn.cpp中增加重载函数 void findFace(Mat &image , vector &resBox ); 

三、mtcnn人头姿态估计

     人头姿态估计代码参考自https://blog.csdn.net/zzyy0929/article/details/78323363

#include "network.h"
#include "mtcnn.h"
#include 
 
#include 
#include 
#include 
#include 
#include "opencv2/opencv.hpp"
 
void rot2Euler(cv::Mat faceImg,const cv::Mat& rotation3_3)
{
	cv::resize(faceImg,faceImg,cv::Size(faceImg.cols*2,faceImg.rows*2));
	double q0 = std::sqrt(1+rotation3_3.at(1,1)+rotation3_3.at(2,2)+rotation3_3.at(3,3))/2;
        double q1 = (rotation3_3.at(3,2)-rotation3_3.at(2,3))/(4*q0);
	double q2 = (rotation3_3.at(1,3)-rotation3_3.at(3,1))/(4*q0);
        double q3 = (rotation3_3.at(2,1)-rotation3_3.at(1,2))/(4*q0);
	double yaw = std::asin( 2*(q0*q2 + q1*q3) );
        double pitch = std::atan2(2*(q0*q1-q2*q3), q0*q0-q1*q1-q2*q2+q3*q3);
	double roll = std::atan2(2*(q0*q3-q1*q2), q0*q0+q1*q1-q2*q2-q3*q3);
        std::cout<<"yaw:"<& facial5Pts )
{
 
	// 3D model points
	std::vector model_points;
        model_points.push_back(cv::Point3d(-165.0f, 170.0f, -115.0f));       // Left eye 
        model_points.push_back(cv::Point3d( 165.0f, 170.0f, -115.0f));        // Right eye
	model_points.push_back(cv::Point3d(0.0f, 0.0f, 0.0f));               // Nose tip
        model_points.push_back(cv::Point3d(-150.0f, -150.0f, -125.0f));      // Left Mouth corner
        model_points.push_back(cv::Point3d(150.0f, -150.0f, -125.0f)); 	     // Right Mouth corner
 
	// Camera internals
	double focal_length = faceImg.cols;
	cv::Point2d center = cv::Point2d(faceImg.cols/2,faceImg.rows/2);
	cv::Mat camera_matrix =(cv::Mat_(3,3) << focal_length, 0, center.x, 0,focal_length,center.y,0, 0,1);
        cv::Mat dist_coeffs = cv::Mat::zeros(4,1,cv::DataType::type);
 
	cv::Mat rotation_vector;
	cv::Mat translation_vector;
 
	cv::solvePnP(model_points,facial5Pts , camera_matrix, dist_coeffs,rotation_vector, translation_vector);
	
	/*投影一条直线而已
	std::vector nose_end_point3D;
	std::vector nose_end_point2D;
	nose_end_point3D.push_back(cv::Point3d(0,0,1000.0));
        projectPoints(nose_end_point3D, rotation_vector, translation_vector,camera_matrix, dist_coeffs, nose_end_point2D);
	//std::cout << "Rotation Vector " << std::endl << rotation_vector << std::endl;
	//std::cout << "Translation Vector" << std::endl << translation_vector << std::endl;
    
	cv::Mat temp(faceImg);
	cv::line(temp ,facial5Pts[2], nose_end_point2D[0], cv::Scalar(255,0,0), 2);
        cv::imshow("vvvvvvvv" ,temp );
	cv::waitKey(1);  */
    
	cv::Mat rotation3_3;
	cv::Rodrigues(rotation_vector,rotation3_3);
  	rot2Euler(faceImg.clone(),rotation3_3);
}
 
void showheadPost(const cv::Mat& img, std::vector& resBox)
{
    for( vector::iterator it=resBox.begin(); it!=resBox.end();it++){
        if((*it).exist){
		int it_x1 = (*it).y1; //这里需注意,(*it).y1表示第一个点的x坐标,(*it).x1表示y坐标
		int it_y1 = (*it).x1;
		int it_x2 = (*it).y2;
                int it_y2 = (*it).x2;
			
		const cv::Mat faceImg = img(Rect(it_x1 ,it_y1 ,it_x2-it_x1 ,it_y2-it_y1 ));
        	std::vector face5Pts;  //脸部5个点的坐标,原点坐标为(0,0)
                for(int i=0 ;i<5 ; ++i ){
			face5Pts.push_back(Point2f(*(it->ppoint+i)-it_x1 , *(it->ppoint+i+5)-it_y1 ));
		}
			
		headPosEstimate(faceImg , face5Pts);
			
	}	
    }
}
 
int main()
{
    cv::Mat img = cv::imread("ldh.jpg");
    mtcnn find(img.rows, img.cols);
    std::vector resBox;
    find.findFace(img , resBox);
    showheadPost(img,resBox);
 
    waitKey(0);
 
    return 0;
}
四、实验结果

    载入两张图片实验,结果如下所示,可以评估侧脸程度,不过失败时会出现nan的计算

 

c++自己算的:

https://blog.csdn.net/Taily_Duan/article/details/60580694

python版的mtcnn:

https://github.com/JuneoXIE/mtcnn-opencv_face_pose_estimation/blob/bedc3e7503190b2d468f64ef01a48d0e61ed3e3c/pose_estimate.py

https://github.com/JuneoXIE/Face_blur_detection/blob/9d3d350343b0726006d9fb98f699254158336512/mtcnn/pose_estimate.py

# encoding=utf8  
import os
import numpy as np
import cv2
from test import *
import math
import _pickle as pickle
# from PIL import Image,ImageDraw,ImageFont

data_dir = r'.../test_datasets/'
save_dir = r'.../results/'

def drawResult(imgpath, yaw, pitch, roll,save_dir):
	img = cv2.imread(imgpath)
	draw = img.copy()
	cv2.putText(draw,"Yaw:"+str(yaw),(20,40),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0))
	cv2.putText(draw,"Pitch:"+str(pitch),(20,80),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0))
	cv2.putText(draw,"Roll:"+str(roll),(20,120),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0))
	cv2.waitKey()
	cv2.imwrite(save_dir+os.path.splitext(imgpath)[0]+'_pose_estimate1.jpg',draw)



def rot2Euler(imgpath,rotation_vector,save_dir):
	# calculate rotation angles
	theta = cv2.norm(rotation_vector, cv2.NORM_L2)
	
	# transformed to quaterniond
	w = math.cos(theta / 2)
	x = math.sin(theta / 2)*rotation_vector[0][0] / theta
	y = math.sin(theta / 2)*rotation_vector[1][0] / theta
	z = math.sin(theta / 2)*rotation_vector[2][0] / theta
	
	ysqr = y * y
	# pitch (x-axis rotation)
	t0 = 2.0 * (w * x + y * z)
	t1 = 1.0 - 2.0 * (x * x + ysqr)
	print('t0:{}, t1:{}'.format(t0, t1))
	pitch = math.atan2(t0, t1) - 0.8356857
	
	# yaw (y-axis rotation)
	t2 = 2.0 * (w * y - z * x)
	if t2 > 1.0:
		t2 = 1.0
	if t2 < -1.0:
		t2 = -1.0
	yaw = math.asin(t2) + 0.005409
	
	# roll (z-axis rotation)
	t3 = 2.0 * (w * z + x * y)
	t4 = 1.0 - 2.0 * (ysqr + z * z)
	roll = math.atan2(t3, t4) - 2.573345436
	
	# 单位转换:将弧度转换为度
	pitch_degree = int((pitch/math.pi)*180)
	yaw_degree = int((yaw/math.pi)*180)
	roll_degree = int((roll/math.pi)*180)
	
	drawResult(imgpath,yaw, pitch, roll,save_dir)
	
	print("Radians:")
	print("Yaw:",yaw)
	print("Pitch:",pitch)
	print("Roll:",roll)
	
	img = cv2.imread(imgpath)
	draw = img.copy()
	cv2.putText(draw,"Yaw:"+str(yaw),(20,40),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0))
	cv2.putText(draw,"Pitch:"+str(pitch),(20,80),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0))
	cv2.putText(draw,"Roll:"+str(roll),(20,120),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0))
	cv2.waitKey()
	cv2.imwrite(os.path.splitext(imgpath)[0]+'_pose_estimate1.jpg',draw)
	
	print("Degrees:")
	draw = img.copy()
	if yaw_degree > 0:
		output_yaw = "face turns left:"+str(abs(yaw_degree))+" degrees"
		cv2.putText(draw,output_yaw,(20,40),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0))
		print(output_yaw)
	if yaw_degree < 0:
		output_yaw = "face turns right:"+str(abs(yaw_degree))+" degrees"
		cv2.putText(draw,output_yaw,(20,40),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0))
		print(output_yaw)
	if pitch_degree < 0:
		output_pitch = "face downwards:"+str(abs(pitch_degree))+" degrees"
		cv2.putText(draw,output_pitch,(20,80),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0))
		print(output_pitch)
	if pitch_degree > 0:
		output_pitch = "face upwards:"+str(abs(pitch_degree))+" degrees"
		cv2.putText(draw,output_pitch,(20,80),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0))
		print(output_pitch)
	if roll_degree < 0:
		output_roll = "face bends to the right:"+str(abs(roll_degree))+" degrees"
		cv2.putText(draw,output_roll,(20,120),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0))
		print(output_roll)
	if roll_degree > 0:
		output_roll = "face bends to the left:"+str(abs(roll_degree))+" degrees"
		cv2.putText(draw,output_roll,(20,120),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0))
		print(output_roll)
	if abs(yaw) < 0.00001 and abs(pitch) < 0.00001 and abs(roll)< 0.00001:
		cv2.putText(draw,"Initial ststus",(20,40),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0))
		print("Initial ststus")
	cv2.imwrite(save_dir+os.path.splitext(imgpath)[0]+'_pose_estimate2.jpg',draw)


def headPosEstimate(imgpath, landmarks, bbox,save_dir):
	# solvePnP函数的所有输入矩阵必须是double类型 
	# 3D model points
	model_3d_points = np.array(([-165.0, 170.0, -115.0],  # Left eye 
								[165.0, 170.0, -115.0],   # Right eye
								[0.0, 0.0, 0.0],          # Nose tip
								[-150.0, -150.0, -125.0], # Left Mouth corner
								[150.0, -150.0, -125.0]), dtype=np.double) # Right Mouth corner)
	landmarks.dtype = np.double
	# Camera internals
	img = cv2.imread(imgpath)
	img_size = img.shape
	focal_length = img_size[1]
	center =  [img_size[1]/2, img_size[0]/2]
	camera_matrix = np.array(([focal_length, 0, center[0]],
							[0, focal_length, center[1]],
							[0, 0, 1]),dtype=np.double)


	dist_coeffs = np.array([0,0,0,0], dtype=np.double)
	found, rotation_vector, translation_vector = cv2.solvePnP(model_3d_points, landmarks, camera_matrix, dist_coeffs)

	rot2Euler(imgpath,rotation_vector,save_dir)


# 测试图像路径
image_names = os.listdir(data_dir)
for index, image_name in enumerate(image_names):
    imgpath = data_dir + image_name
    bbox, landmarks = get_landmarks(imgpath)
    print("Image_index:", index)
    headPosEstimate(imgpath, landmarks, bbox, save_dir)

dlib 68个:

https://github.com/JuneoXIE/dlib-opencv_face_pose_estimation/blob/master/pose_estimate_dlib.py

#!/usr/bin/env python
import os
import cv2
import numpy as np
import dlib
import time
import math

data_dir = r"...\test_datasets"
save_dir = r"...\results"
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(r"...\shape_predictor_68_face_landmarks.dat")
POINTS_NUM_LANDMARK = 68


# 获取最大的人脸
def _largest_face(dets):
    if len(dets) == 1:
        return 0

    face_areas = [ (det.right()-det.left())*(det.bottom()-det.top()) for det in dets]

    largest_area = face_areas[0]
    largest_index = 0
    for index in range(1, len(dets)):
        if face_areas[index] > largest_area :
            largest_index = index
            largest_area = face_areas[index]

    print("largest_face index is {} in {} faces".format(largest_index, len(dets)))

    return largest_index


# 从dlib的检测结果抽取姿态估计需要的点坐标
def get_image_points_from_landmark_shape(landmark_shape):
    if landmark_shape.num_parts != POINTS_NUM_LANDMARK:
        print("ERROR:landmark_shape.num_parts-{}".format(landmark_shape.num_parts))
        return -1, None

    #2D image points. If you change the image, you need to change vector

    image_points = np.array([
                                (landmark_shape.part(17).x, landmark_shape.part(17).y),  #17 left brow left corner
                                (landmark_shape.part(21).x, landmark_shape.part(21).y),  #21 left brow right corner
                                (landmark_shape.part(22).x, landmark_shape.part(22).y),  #22 right brow left corner
                                (landmark_shape.part(26).x, landmark_shape.part(26).y),  #26 right brow right corner
                                (landmark_shape.part(36).x, landmark_shape.part(36).y),  #36 left eye left corner
                                (landmark_shape.part(39).x, landmark_shape.part(39).y),  #39 left eye right corner
                                (landmark_shape.part(42).x, landmark_shape.part(42).y),  #42 right eye left corner
                                (landmark_shape.part(45).x, landmark_shape.part(45).y),  #45 right eye right corner
                                (landmark_shape.part(31).x, landmark_shape.part(31).y),  #31 nose left corner
                                (landmark_shape.part(35).x, landmark_shape.part(35).y),  #35 nose right corner
                                (landmark_shape.part(48).x, landmark_shape.part(48).y),  #48 mouth left corner
                                (landmark_shape.part(54).x, landmark_shape.part(54).y),  #54 mouth right corner
                                (landmark_shape.part(57).x, landmark_shape.part(57).y),  #57 mouth central bottom corner
                                (landmark_shape.part(8).x, landmark_shape.part(8).y),  #8 chin corner
                            ], dtype="double")
    return 0, image_points
    
    
# 用dlib检测关键点,返回姿态估计需要的几个点坐标
def get_image_points(img):
                            
    gray = cv2.cvtColor( img, cv2.COLOR_BGR2GRAY )  # 图片调整为灰色
    dets = detector( img, 0 )

    if 0 == len( dets ):
        print( "ERROR: found no face" )
        return -1, None
    largest_index = _largest_face(dets)
    face_rectangle = dets[largest_index]

    landmark_shape = predictor(img, face_rectangle)

    return get_image_points_from_landmark_shape(landmark_shape)


# 获取旋转向量和平移向量                        
def get_pose_estimation(img_size, image_points ):
    # 3D model points.
    model_points = np.array([
                                (6.825897, 6.760612, 4.402142),     #33 left brow left corner
                                (1.330353, 7.122144, 6.903745),     #29 left brow right corner
                                (-1.330353, 7.122144, 6.903745),    #34 right brow left corner
                                (-6.825897, 6.760612, 4.402142),    #38 right brow right corner
                                (5.311432, 5.485328, 3.987654),     #13 left eye left corner
                                (1.789930, 5.393625, 4.413414),     #17 left eye right corner
                                (-1.789930, 5.393625, 4.413414),    #25 right eye left corner
                                (-5.311432, 5.485328, 3.987654),    #21 right eye right corner
                                (2.005628, 1.409845, 6.165652),     #55 nose left corner
                                (-2.005628, 1.409845, 6.165652),    #49 nose right corner
                                (2.774015, -2.080775, 5.048531),    #43 mouth left corner
                                (-2.774015, -2.080775, 5.048531),   #39 mouth right corner
                                (0.000000, -3.116408, 6.097667),    #45 mouth central bottom corner
                                (0.000000, -7.415691, 4.070434)     #6 chin corner
                            ])
    # Camera internals
     
    focal_length = img_size[1]
    center = (img_size[1]/2, img_size[0]/2)
    camera_matrix = np.array(
                             [[focal_length, 0, center[0]],
                             [0, focal_length, center[1]],
                             [0, 0, 1]], dtype = "double"
                             )

    dist_coeffs = np.array([7.0834633684407095e-002, 6.9140193737175351e-002, 0.0, 0.0, -1.3073460323689292e+000],dtype= "double") # Assuming no lens distortion
    
    (success, rotation_vector, translation_vector) = cv2.solvePnP(model_points, image_points, camera_matrix, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE )

    # print("Rotation Vector:\n {}".format(rotation_vector))
    # print("Translation Vector:\n {}".format(translation_vector))
    return success, rotation_vector, translation_vector, camera_matrix, dist_coeffs


# 从旋转向量转换为欧拉角
def get_euler_angle(rotation_vector):
    # calculate rotation angles
    theta = cv2.norm(rotation_vector, cv2.NORM_L2)

    # transformed to quaterniond
    w = math.cos(theta / 2)
    x = math.sin(theta / 2)*rotation_vector[0][0] / theta
    y = math.sin(theta / 2)*rotation_vector[1][0] / theta
    z = math.sin(theta / 2)*rotation_vector[2][0] / theta

    ysqr = y * y
    # pitch (x-axis rotation)
    t0 = 2.0 * (w * x + y * z)
    t1 = 1.0 - 2.0 * (x * x + ysqr)
    
    # print('t0:{}, t1:{}'.format(t0, t1))
    pitch = math.atan2(t0, t1)

    # yaw (y-axis rotation)
    t2 = 2.0 * (w * y - z * x)
    if t2 > 1.0:
        t2 = 1.0
    if t2 < -1.0:
        t2 = -1.0
    yaw = math.asin(t2)

    # roll (z-axis rotation)
    t3 = 2.0 * (w * z + x * y)
    t4 = 1.0 - 2.0 * (ysqr + z * z)
    roll = math.atan2(t3, t4)

    print('pitch:{}, yaw:{}, roll:{}'.format(pitch, yaw, roll))

    # 单位转换:将弧度转换为度
    pitch_degree = int((pitch/math.pi)*180)
    yaw_degree = int((yaw/math.pi)*180)
    roll_degree = int((roll/math.pi)*180)

    return 0, pitch, yaw, roll, pitch_degree, yaw_degree, roll_degree


def get_pose_estimation_in_euler_angle(landmark_shape, im_szie):
    try:
        ret, image_points = get_image_points_from_landmark_shape(landmark_shape)
        if ret != 0:
            print('get_image_points failed')
            return -1, None, None, None

        ret, rotation_vector, translation_vector, camera_matrix, dist_coeffs = get_pose_estimation(im_szie, image_points)
        if ret != True:
            print('get_pose_estimation failed')
            return -1, None, None, None

        ret, pitch, yaw, roll = get_euler_angle(rotation_vector)
        if ret != 0:
            print('get_euler_angle failed')
            return -1, None, None, None

        euler_angle_str = 'Pitch:{}, Yaw:{}, Roll:{}'.format(pitch, yaw, roll)
        print(euler_angle_str)
        return 0, pitch, yaw, roll

    except Exception as e:
        print('get_pose_estimation_in_euler_angle exception:{}'.format(e))
        return -1, None, None, None

if __name__ == '__main__':
    # Read Image
    image_names = os.listdir(data_dir)
    for index, image_name in enumerate(image_names):
        print("Image:", image_name)
        imgpath = data_dir +'\\'+ image_name
        im = cv2.imread(imgpath)
        size = im.shape

        if size[0] > 700:
            h = size[0] / 3
            w = size[1] / 3
            im = cv2.resize( im, (int( w ), int( h )), interpolation=cv2.INTER_CUBIC )
            size = im.shape

        ret, image_points = get_image_points(im)
        if ret != 0:
            print('get_image_points failed')
            continue

        ret, rotation_vector, translation_vector, camera_matrix, dist_coeffs = get_pose_estimation(size, image_points)
        if ret != True:
            print('get_pose_estimation failed')
            continue
            
        ret, pitch, yaw, roll,pitch_degree, yaw_degree, roll_degree = get_euler_angle(rotation_vector)
        
        draw = im.copy()
        # Yaw:
        if yaw_degree < 0:
            output_yaw = "face turns left:"+str(abs(yaw_degree))+" degrees"
            # cv2.putText(draw,output_yaw,(20,40),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0))
            print(output_yaw)
        if yaw_degree == 0:
            print("face doesn't turns left or right")
        if yaw_degree > 0:
            output_yaw = "face turns right:"+str(abs(yaw_degree))+" degrees"
            # cv2.putText(draw,output_yaw,(20,40),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0))
            print(output_yaw)
        # Pitch:
        if pitch_degree > 0:
            output_pitch = "face downwards:"+str(abs(pitch_degree))+" degrees"
            # cv2.putText(draw,output_pitch,(20,80),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0))
            print(output_pitch)
        if pitch_degree == 0:
            print("face not downwards or upwards")
        if pitch_degree < 0:
            output_pitch = "face upwards:"+str(abs(pitch_degree))+" degrees"
            # cv2.putText(draw,output_pitch,(20,80),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0))
            print(output_pitch)
        # Roll:
        if roll_degree < 0:
            output_roll = "face bends to the right:"+str(abs(roll_degree))+" degrees"
            # cv2.putText(draw,output_roll,(20,120),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0))
            print(output_roll)
        if roll_degree == 0:
            print("face doesn't bend to the right or the left.")
        if roll_degree > 0:
            output_roll = "face bends to the left:"+str(abs(roll_degree))+" degrees"
            # cv2.putText(draw,output_roll,(20,120),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0))
            print(output_roll)
        # Initial status:
        if abs(yaw) < 0.00001 and abs(pitch) < 0.00001 and abs(roll) < 0.00001:
            # cv2.putText(draw,"Initial ststus",(20,40),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0))
            print("Initial ststus")
        # cv2.imwrite(save_dir+"\\"+os.path.splitext(imgpath)[0]+'_pose_estimate.jpg',draw)

 

你可能感兴趣的:(c++)