1. 借助于OpenPose生成人脸关键点和头部姿态,使用链接:https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments

build/bin/FaceLandmarkImg.exe -fdir "images path"


2. 借助于人脸检测网络生成人脸检测结果,每张人脸可得到一个csv文件,里面记录了人脸的左上角和右下角坐标,具体使用可参考博客


face_path x1 y1 x2 y2 x3 y3 ...... x68 y68 pitch yaw roll


import sys, os
import cv2
import numpy as np
import csv
import argparse
import math
from shutil import copyfile

#Determine whether it is a rotation matrix
def isRotationMatrix(R):
    Rt = np.transpose(R)
    shouldBeIdentity = np.dot(Rt, R)
    I = np.identity(3, dtype=R.dtype)
    n = np.linalg.norm(I - shouldBeIdentity)
    return n < 1e-6

#rotationvector to rorationmatrix
def vec2matrix(rvec):
    theta = np.linalg.norm(rvec)
    r = rvec / theta
    R_ = np.array([[0, -r[2][0], r[1][0]],
               [r[2][0], 0, -r[0][0]],
               [-r[1][0], r[0][0], 0]])
    R = np.cos(theta) * np.eye(3) + (1 - np.cos(theta)) * r * r.T + np.sin(theta) * R_
    return R

#rotationmatrix to EulerAngles
def rotationMatrixToAngles(R):
    assert (isRotationMatrix(R))
    sy = math.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0])
    singular = sy < 1e-6
    if not singular:
        x = math.atan2(R[2, 1], R[2, 2])
        y = math.atan2(-R[2, 0], sy)
        z = math.atan2(R[1, 0], R[0, 0])
        x = math.atan2(-R[1, 2], R[1, 1])
        y = math.atan2(-R[2, 0], sy)
        z = 0

    x = x*180.0/3.141592653589793
    y = y*180.0/3.141592653589793
    z = z*180.0/3.141592653589793
    return np.array([-1.0*x+10, y, z])

def compute_iou(rec1, rec2):
    computing IoU
    :param rec1: (y0, x0, y1, x1), which reflects
            (top, left, bottom, right)
    :param rec2: (y0, x0, y1, x1)
    :return: scala value of IoU
    # computing area of each rectangles
    S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
    S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])

    # computing the sum_area
    sum_area = S_rec1 + S_rec2

    # find the each edge of intersect rectangle
    left_line = max(rec1[1], rec2[1])
    right_line = min(rec1[3], rec2[3])
    top_line = max(rec1[0], rec2[0])
    bottom_line = min(rec1[2], rec2[2])

    # judge if there is an intersect
    if left_line >= right_line or top_line >= bottom_line:
        return 0
        intersect = (right_line - left_line) * (bottom_line - top_line)
        return (intersect / (sum_area - intersect)) * 1.0

def Generate_labels(img_path, csv_path, txt_path, face_path, label_path):

    :param img_path:
    :param csv_path:
    :param txt_path:
    :param face_path:
    :param label_path:
    img_list = os.listdir(img_path)
    fp_label = open(os.path.join(label_path, '300w_label.txt'), 'w')
    for img_file in img_list:
        print('now is dealing with '+str(img_file))
        # Iterative execution of each image
        filename, extension = os.path.splitext(img_file)
        extension = extension[1:]
        if extension == 'jpg' or extension == 'png':
            img = cv2.imread(os.path.join(img_path, img_file))
            # read and organize the result of OpenFace, result is saved to 'csv_result' for format as:
            # [
            # [point0_x,point0_y],
            # [point1_x,point1_y],
            # [point2_x,point2_y],
            # ......
            # [pose_x,pose_y,pose_z],
            # [xmin_csv,ymin_csv,xmax_csv,ymax_csv]
            # ]
            if not os.path.exists(os.path.join(csv_path, filename + '.csv')):
            csv_file = open(os.path.join(csv_path, filename + '.csv'), 'r')
            reader = csv.reader(csv_file)
            csv_result = []
            for item in reader:
                csv_result_temp = []
                xmin_csv = 50000
                ymin_csv = 50000
                xmax_csv = 0
                ymax_csv = 0
                if reader.line_num == 1:
                for i in range(68):
                    csv_result_temp.append([float(item[296 + i]), float(item[296 + i + 68])])
                    xmin_csv = min(xmin_csv, float(item[296 + i]))
                    xmax_csv = max(xmax_csv, float(item[296 + i]))
                    ymin_csv = min(ymin_csv, float(item[296 + i + 68]))
                    ymax_csv = max(ymax_csv, float(item[296 + i + 68]))
                csv_result_temp.append([item[293], item[294], item[295]])
                csv_result_temp.append([xmin_csv, ymin_csv, xmax_csv, ymax_csv])
            # read and organize the label of 300w dataset, result is saved to 'pts_result' for format as:
            # [
            # [point0_x,point0_y],
            # [point1_x,point1_y],
            # [point2_x,point2_y],
            # ......
            # [xmin_pts,ymin_pts,xmax_pts,ymax_pts]
            # ]
            fp_pts = open(os.path.join(img_path, filename + '.pts'), 'r')
            lines = fp_pts.readlines()
            lines = lines[3:71]
            xmin_pts = 50000
            ymin_pts = 50000
            xmax_pts = 0
            ymax_pts = 0
            pts_result = []
            for line in lines:
                S = line.split(' ')
                point_x = float(S[0])
                point_y = float(S[1])
                xmin_pts = min(xmin_pts, point_x)
                ymin_pts = min(ymin_pts, point_y)
                xmax_pts = max(xmax_pts, point_x)
                ymax_pts = max(ymax_pts, point_y)
                pts_result.append([point_x, point_y])
            pts_result.append([xmin_pts, ymin_pts, xmax_pts, ymax_pts])
            face_pose = []
            face_bbox = []
            iou_max = 0
            for i in range(len(csv_result)):
                if compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts), (
                csv_result[i][69][1], csv_result[i][69][0], csv_result[i][69][3], csv_result[i][69][2])) > iou_max\
                        and compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts), (
                csv_result[i][69][1], csv_result[i][69][0], csv_result[i][69][3], csv_result[i][69][2])) >= 0.5:
                    face_pose = csv_result[i][68][0:]
            if not face_pose:

            # read the face detected result with txt format and get the face bbox, which is in Square form
            fp_txt = open(os.path.join(txt_path, filename + '.txt'), 'r')
            lines = fp_txt.readlines()
            iou_max = 0
            for line in lines:
                line = line[:-1]
                S = line.split(' ')
                xmin_txt = float(S[0])
                ymin_txt = float(S[1])
                xmax_txt = float(S[2])
                ymax_txt = float(S[3])
                if compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts),
                               (ymin_txt, xmin_txt, ymax_txt, xmax_txt)) > iou_max\
                        and compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts),
                               (ymin_txt, xmin_txt, ymax_txt, xmax_txt)) >= 0.7:
                    face_bbox.append([xmin_txt, ymin_txt, xmax_txt, ymax_txt])

            # determine if the face is detected
            if face_bbox:
                width = xmax_txt - xmin_txt
                height = ymax_txt - ymin_txt
                adjust_value = 1.0 * (height - width) / 2.0
                xmin = int(round(max(0, xmin_txt - adjust_value)))
                ymin = int(round(ymin_txt))
                xmax = int(round(min(img.shape[1], xmin+height)))
                ymax = int(round(ymax_txt))
		print(xmax-xmin, ymax-ymin)
                width = xmax_pts - xmin_pts
                height = ymax_pts - ymin_pts
                long_size = max(width, height)
                adjust_value_x = 1.0 * (long_size - width) / 2.0
                adjust_value_y = 1.0 * (long_size - height) / 2.0
                xmin = int(round(max(0, xmin_pts - adjust_value_x)))
                xmax = int(round(min(img.shape[1], xmin+long_size)))
                ymin = int(round(max(0, ymin_pts - adjust_value_y)))
                ymax = int(round(min(img.shape[0], ymin+long_size)))
            # crop face from original img
            img_face = img[ymin:ymax, xmin:xmax, :]
            cv2.imwrite(os.path.join(face_path, filename + '.jpg'), img_face)
            # write the path of cropped face to txt
            fp_label.write(os.path.join(face_path, filename + '.jpg') + ' ')
            #fp_label.write('/home/OpenFace/300w_face/'+filename + '.jpg'+' ')
            # write the point coordinates to txt
            for i in range(68):
                fp_label.write(str(pts_result[i][0] - xmin) + ' ' + str(pts_result[i][1] - ymin) + ' ')
            # write the face pose to txt
            rotation_vector = []
            R = vec2matrix(rotation_vector)
            headpose = rotationMatrixToAngles(R)
	    headpose = []
            fp_label.write(str(headpose[0]) + ' ' + str(headpose[1]) +' ' + str(headpose[2]) + '\n')

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--img_path", type=str, default='img_path',
                        help="the path to the 300w dataset")
    parser.add_argument("--csv_path", type=str, default='csv_path',
                        help="the path to the result of OpenFace")
    parser.add_argument("--txt_path", type=str, default='txt_path',
                        help="the path to the face detect")
    parser.add_argument("--face_path", type=str, default='face_path_test',
                        help="the path to the crop face saved")
    parser.add_argument("--label_path", type=str, default='label_path_test',
                        help="the path to the label of 300w dataset")
    opt = parser.parse_args()
    Generate_labels(opt.img_path, opt.csv_path, opt.txt_path, opt.face_path, opt.label_path)



image   人脸

1. resize(img,(60,60)).astype('f4')
2. cvtcolor(BGR2GRAY).reshape(1,60,60)
3. m,s = cv2.meanstdDev(image)
4. image = (image - m) / (1.e-6 + s)
landmarks  关键点
1. 乘以scale系数  rx=60/face img width  ry=60/face img height
2. 归一化  /60
poses  头部姿态
1. 归一化  /50


import sys,os
import cv2
import numpy as np
from shutil import copyfile
caffe_root = 'caffe/python/'
sys.path.insert(0, caffe_root + 'python')
import caffe
import h5py
import argparse

IMAGE_SIZE = 60  #fixed size to all images

def Generate_hdf5(train_txt_file, train_h5_file, train_h5_list_file):
	   Generate hdf5 format based on the txt label
	   :param train_txt_file: label in txt format (imgpath x1 y1 x2 y2 ...... x68 y68 pitch yaw roll)
	   :param train_h5_file: h5 file to be saved
	   :param train_h5_list_file: list file in txt format to be saved
    with open(train_txt_file,'r') as T:
        lines = T.readlines()
        HD5Images = np.zeros([len(lines), 1, IMAGE_SIZE, IMAGE_SIZE], dtype='float32')
        HD5Landmarks = np.zeros([len(lines), 136], dtype='float32')
        HD5Poses = np.zeros([len(lines), 3], dtype='float32')

        for i,l in enumerate(lines):
            sp = l.split(' ')
            img = cv2.imread(sp[0])
            height,width = img.shape[0], img.shape[1]
            rx,ry = 1.0*IMAGE_SIZE/width, 1.0*IMAGE_SIZE/height
            res = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE), 0.0, 0.0, interpolation=cv2.INTER_CUBIC)
            image = res.astype('f4')
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY).reshape(1, IMAGE_SIZE, IMAGE_SIZE)
            m, s = cv2.meanStdDev(image)
            image = (image - m) / (1.e-6 + s)
            HD5Images[i, :] = image
            label_ = np.zeros([136], dtype='float32')
            pose_ = np.zeros([3], dtype='float32')

            for j in range(136):
                if (j + 1) % 2:
                    scale_factor = rx
                    scale_factor = ry
                label_[j] = float(sp[j + 1]) * float(scale_factor)
                label_[j] = label_[j]/(1.0*IMAGE_SIZE)
                label_[j] = label_[j].astype('f4')
            HD5Landmarks[i, :] = label_[:]
            # print(HD5Landmarks[i, :])

            for j in range(3):
                normalize_factor = 50
                pose_[j] = float(sp[j + 1 + 136]) / float(normalize_factor)
                pose_[j] = pose_[j].astype('f4')
       		HD5Poses[i, :] = pose_[:]

		with h5py.File(train_h5_file, 'w') as H:
			H.create_dataset('data', data=HD5Images)
			H.create_dataset('label', data=HD5Landmarks)
			H.create_dataset('pose', data=HD5Poses)
		with open(train_h5_list_file, 'w') as L:

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--train_txt_file",type=str,default="label_path/300w_label.txt", help="path to label in txt format")
    parser.add_argument("--train_h5_file",type=str,default="test.h5",help="path to generated h5 file")
    parser.add_argument("--train_h5_list_file",type=str,default="test_h5_list.txt",help="path to generated he list file")
    Generate_hdf5(opt.train_txt_file, opt.train_h5_file, opt.train_h5_list_file)

5. 可视化hdf5文件是否正确


import sys,os
import cv2
import h5py
import numpy as np

f = h5py.File('../data/300w_ori/train.h5','r')
data = f['data']
label = f['label']
pose = f['pose']
num = data.shape[0]
for i in range(1,100):
    # print(data.shape)
    img = np.array(data[i,0,:,:]*255)
    image = np.zeros([img.shape[0],img.shape[1],3],dtype='float32')
    image[:,:,0] = img[:]
    image[:,:,1] = img[:]
    image[:,:,2] = img[:]
    landmark = np.array(label[i,:])
    # print(landmark.shape)
    img = cv2.imread(os.path.join('../data/300w_ori/result',str(i)+'.jpg'))
    for j in range(68):
        x = int(round(landmark[2*j]*60))
        y = int(round(landmark[2*j+1]*60))
        # print(x,y)
    Euler = np.array(pose[i,:])
    # print(Euler.shape)
    cv2.imwrite(os.path.join('../data/300w_ori/result',str(i)+'.jpg'), img)

