接上篇 人脸关键点检测和头部姿态估计数据集整理
1. 借助于OpenPose生成人脸关键点和头部姿态,使用链接:https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments
build/bin/FaceLandmarkImg.exe -fdir "images path"
具体使用可参考博客,每张人脸可得到一个csv文件,里面记录了人脸的很多信息,我们只需要68个关键点和头部姿态
2. 借助于人脸检测网络生成人脸检测结果,每张人脸可得到一个csv文件,里面记录了人脸的左上角和右下角坐标,具体使用可参考博客
3.结合OpenPose和人脸检测结果生成数据集label标注文件,格式为:
face_path x1 y1 x2 y2 x3 y3 ...... x68 y68 pitch yaw roll
执行代码:Generate_labels.py
import sys, os
import cv2
import numpy as np
import csv
import argparse
import math
from shutil import copyfile
#Determine whether it is a rotation matrix
def isRotationMatrix(R):
Rt = np.transpose(R)
shouldBeIdentity = np.dot(Rt, R)
I = np.identity(3, dtype=R.dtype)
n = np.linalg.norm(I - shouldBeIdentity)
return n < 1e-6
#rotationvector to rorationmatrix
def vec2matrix(rvec):
theta = np.linalg.norm(rvec)
r = rvec / theta
R_ = np.array([[0, -r[2][0], r[1][0]],
[r[2][0], 0, -r[0][0]],
[-r[1][0], r[0][0], 0]])
R = np.cos(theta) * np.eye(3) + (1 - np.cos(theta)) * r * r.T + np.sin(theta) * R_
return R
#rotationmatrix to EulerAngles
def rotationMatrixToAngles(R):
assert (isRotationMatrix(R))
sy = math.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0])
singular = sy < 1e-6
if not singular:
x = math.atan2(R[2, 1], R[2, 2])
y = math.atan2(-R[2, 0], sy)
z = math.atan2(R[1, 0], R[0, 0])
else:
x = math.atan2(-R[1, 2], R[1, 1])
y = math.atan2(-R[2, 0], sy)
z = 0
x = x*180.0/3.141592653589793
y = y*180.0/3.141592653589793
z = z*180.0/3.141592653589793
return np.array([-1.0*x+10, y, z])
def compute_iou(rec1, rec2):
"""
computing IoU
:param rec1: (y0, x0, y1, x1), which reflects
(top, left, bottom, right)
:param rec2: (y0, x0, y1, x1)
:return: scala value of IoU
"""
# computing area of each rectangles
S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])
# computing the sum_area
sum_area = S_rec1 + S_rec2
# find the each edge of intersect rectangle
left_line = max(rec1[1], rec2[1])
right_line = min(rec1[3], rec2[3])
top_line = max(rec1[0], rec2[0])
bottom_line = min(rec1[2], rec2[2])
# judge if there is an intersect
if left_line >= right_line or top_line >= bottom_line:
return 0
else:
intersect = (right_line - left_line) * (bottom_line - top_line)
return (intersect / (sum_area - intersect)) * 1.0
def Generate_labels(img_path, csv_path, txt_path, face_path, label_path):
"""
:param img_path:
:param csv_path:
:param txt_path:
:param face_path:
:param label_path:
:return:
"""
img_list = os.listdir(img_path)
fp_label = open(os.path.join(label_path, '300w_label.txt'), 'w')
for img_file in img_list:
print('now is dealing with '+str(img_file))
# Iterative execution of each image
filename, extension = os.path.splitext(img_file)
extension = extension[1:]
if extension == 'jpg' or extension == 'png':
img = cv2.imread(os.path.join(img_path, img_file))
# read and organize the result of OpenFace, result is saved to 'csv_result' for format as:
# [
# [point0_x,point0_y],
# [point1_x,point1_y],
# [point2_x,point2_y],
# ......
# [pose_x,pose_y,pose_z],
# [xmin_csv,ymin_csv,xmax_csv,ymax_csv]
# ]
if not os.path.exists(os.path.join(csv_path, filename + '.csv')):
continue
csv_file = open(os.path.join(csv_path, filename + '.csv'), 'r')
reader = csv.reader(csv_file)
csv_result = []
for item in reader:
csv_result_temp = []
xmin_csv = 50000
ymin_csv = 50000
xmax_csv = 0
ymax_csv = 0
if reader.line_num == 1:
continue
for i in range(68):
csv_result_temp.append([float(item[296 + i]), float(item[296 + i + 68])])
xmin_csv = min(xmin_csv, float(item[296 + i]))
xmax_csv = max(xmax_csv, float(item[296 + i]))
ymin_csv = min(ymin_csv, float(item[296 + i + 68]))
ymax_csv = max(ymax_csv, float(item[296 + i + 68]))
csv_result_temp.append([item[293], item[294], item[295]])
csv_result_temp.append([xmin_csv, ymin_csv, xmax_csv, ymax_csv])
csv_result.append(csv_result_temp)
csv_file.close()
# read and organize the label of 300w dataset, result is saved to 'pts_result' for format as:
# [
# [point0_x,point0_y],
# [point1_x,point1_y],
# [point2_x,point2_y],
# ......
# [xmin_pts,ymin_pts,xmax_pts,ymax_pts]
# ]
fp_pts = open(os.path.join(img_path, filename + '.pts'), 'r')
lines = fp_pts.readlines()
lines = lines[3:71]
xmin_pts = 50000
ymin_pts = 50000
xmax_pts = 0
ymax_pts = 0
pts_result = []
for line in lines:
S = line.split(' ')
point_x = float(S[0])
point_y = float(S[1])
xmin_pts = min(xmin_pts, point_x)
ymin_pts = min(ymin_pts, point_y)
xmax_pts = max(xmax_pts, point_x)
ymax_pts = max(ymax_pts, point_y)
pts_result.append([point_x, point_y])
pts_result.append([xmin_pts, ymin_pts, xmax_pts, ymax_pts])
fp_pts.close()
face_pose = []
face_bbox = []
iou_max = 0
for i in range(len(csv_result)):
if compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts), (
csv_result[i][69][1], csv_result[i][69][0], csv_result[i][69][3], csv_result[i][69][2])) > iou_max\
and compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts), (
csv_result[i][69][1], csv_result[i][69][0], csv_result[i][69][3], csv_result[i][69][2])) >= 0.5:
face_pose = csv_result[i][68][0:]
if not face_pose:
continue
# read the face detected result with txt format and get the face bbox, which is in Square form
fp_txt = open(os.path.join(txt_path, filename + '.txt'), 'r')
lines = fp_txt.readlines()
iou_max = 0
for line in lines:
line = line[:-1]
S = line.split(' ')
xmin_txt = float(S[0])
ymin_txt = float(S[1])
xmax_txt = float(S[2])
ymax_txt = float(S[3])
if compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts),
(ymin_txt, xmin_txt, ymax_txt, xmax_txt)) > iou_max\
and compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts),
(ymin_txt, xmin_txt, ymax_txt, xmax_txt)) >= 0.7:
face_bbox.append([xmin_txt, ymin_txt, xmax_txt, ymax_txt])
fp_txt.close()
# determine if the face is detected
if face_bbox:
width = xmax_txt - xmin_txt
height = ymax_txt - ymin_txt
adjust_value = 1.0 * (height - width) / 2.0
xmin = int(round(max(0, xmin_txt - adjust_value)))
ymin = int(round(ymin_txt))
xmax = int(round(min(img.shape[1], xmin+height)))
ymax = int(round(ymax_txt))
print(xmax-xmin, ymax-ymin)
else:
width = xmax_pts - xmin_pts
height = ymax_pts - ymin_pts
long_size = max(width, height)
adjust_value_x = 1.0 * (long_size - width) / 2.0
adjust_value_y = 1.0 * (long_size - height) / 2.0
xmin = int(round(max(0, xmin_pts - adjust_value_x)))
xmax = int(round(min(img.shape[1], xmin+long_size)))
ymin = int(round(max(0, ymin_pts - adjust_value_y)))
ymax = int(round(min(img.shape[0], ymin+long_size)))
print(xmax-xmin,ymax-ymin)
# crop face from original img
img_face = img[ymin:ymax, xmin:xmax, :]
cv2.imwrite(os.path.join(face_path, filename + '.jpg'), img_face)
# write the path of cropped face to txt
fp_label.write(os.path.join(face_path, filename + '.jpg') + ' ')
#fp_label.write('/home/OpenFace/300w_face/'+filename + '.jpg'+' ')
# write the point coordinates to txt
for i in range(68):
fp_label.write(str(pts_result[i][0] - xmin) + ' ' + str(pts_result[i][1] - ymin) + ' ')
# write the face pose to txt
'''
rotation_vector = []
rotation_vector.append([float(face_pose[0])])
rotation_vector.append([float(face_pose[1])])
rotation_vector.append([float(face_pose[2])])
R = vec2matrix(rotation_vector)
headpose = rotationMatrixToAngles(R)
'''
headpose = []
headpose.append(float(face_pose[0])/math.pi*180)
headpose.append(float(face_pose[1])/math.pi*180)
headpose.append(float(face_pose[2])/math.pi*180)
fp_label.write(str(headpose[0]) + ' ' + str(headpose[1]) +' ' + str(headpose[2]) + '\n')
fp_label.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--img_path", type=str, default='img_path',
help="the path to the 300w dataset")
parser.add_argument("--csv_path", type=str, default='csv_path',
help="the path to the result of OpenFace")
parser.add_argument("--txt_path", type=str, default='txt_path',
help="the path to the face detect")
parser.add_argument("--face_path", type=str, default='face_path_test',
help="the path to the crop face saved")
parser.add_argument("--label_path", type=str, default='label_path_test',
help="the path to the label of 300w dataset")
opt = parser.parse_args()
Generate_labels(opt.img_path, opt.csv_path, opt.txt_path, opt.face_path, opt.label_path)
4.参考github:https://github.com/lsy17096535/face-landmark/blob/master/train/mainloop.py
生成hdf5时对数据集做的预处理操作如下所示:
image 人脸
1. resize(img,(60,60)).astype('f4')
2. cvtcolor(BGR2GRAY).reshape(1,60,60)
3. m,s = cv2.meanstdDev(image)
4. image = (image - m) / (1.e-6 + s)
landmarks 关键点
1. 乘以scale系数 rx=60/face img width ry=60/face img height
2. 归一化 /60
poses 头部姿态
1. 归一化 /50
执行代码:Generate_hdf5.py
import sys,os
import cv2
import numpy as np
from shutil import copyfile
caffe_root = 'caffe/python/'
sys.path.insert(0, caffe_root + 'python')
import caffe
import h5py
import argparse
IMAGE_SIZE = 60 #fixed size to all images
def Generate_hdf5(train_txt_file, train_h5_file, train_h5_list_file):
"""
Generate hdf5 format based on the txt label
:param train_txt_file: label in txt format (imgpath x1 y1 x2 y2 ...... x68 y68 pitch yaw roll)
:param train_h5_file: h5 file to be saved
:param train_h5_list_file: list file in txt format to be saved
:return:
"""
with open(train_txt_file,'r') as T:
lines = T.readlines()
HD5Images = np.zeros([len(lines), 1, IMAGE_SIZE, IMAGE_SIZE], dtype='float32')
HD5Landmarks = np.zeros([len(lines), 136], dtype='float32')
HD5Poses = np.zeros([len(lines), 3], dtype='float32')
for i,l in enumerate(lines):
sp = l.split(' ')
print(sp[0])
img = cv2.imread(sp[0])
height,width = img.shape[0], img.shape[1]
rx,ry = 1.0*IMAGE_SIZE/width, 1.0*IMAGE_SIZE/height
res = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE), 0.0, 0.0, interpolation=cv2.INTER_CUBIC)
image = res.astype('f4')
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY).reshape(1, IMAGE_SIZE, IMAGE_SIZE)
m, s = cv2.meanStdDev(image)
image = (image - m) / (1.e-6 + s)
HD5Images[i, :] = image
label_ = np.zeros([136], dtype='float32')
pose_ = np.zeros([3], dtype='float32')
for j in range(136):
if (j + 1) % 2:
scale_factor = rx
else:
scale_factor = ry
label_[j] = float(sp[j + 1]) * float(scale_factor)
label_[j] = label_[j]/(1.0*IMAGE_SIZE)
label_[j] = label_[j].astype('f4')
HD5Landmarks[i, :] = label_[:]
# print(HD5Landmarks[i, :])
for j in range(3):
normalize_factor = 50
pose_[j] = float(sp[j + 1 + 136]) / float(normalize_factor)
pose_[j] = pose_[j].astype('f4')
HD5Poses[i, :] = pose_[:]
with h5py.File(train_h5_file, 'w') as H:
H.create_dataset('data', data=HD5Images)
H.create_dataset('label', data=HD5Landmarks)
H.create_dataset('pose', data=HD5Poses)
with open(train_h5_list_file, 'w') as L:
L.write(train_h5_file)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--train_txt_file",type=str,default="label_path/300w_label.txt", help="path to label in txt format")
parser.add_argument("--train_h5_file",type=str,default="test.h5",help="path to generated h5 file")
parser.add_argument("--train_h5_list_file",type=str,default="test_h5_list.txt",help="path to generated he list file")
opt=parser.parse_args()
Generate_hdf5(opt.train_txt_file, opt.train_h5_file, opt.train_h5_list_file)
5. 可视化hdf5文件是否正确
执行代码:visual_h5.py
import sys,os
import cv2
import h5py
import numpy as np
f = h5py.File('../data/300w_ori/train.h5','r')
data = f['data']
label = f['label']
pose = f['pose']
num = data.shape[0]
for i in range(1,100):
# print(data.shape)
img = np.array(data[i,0,:,:]*255)
image = np.zeros([img.shape[0],img.shape[1],3],dtype='float32')
image[:,:,0] = img[:]
image[:,:,1] = img[:]
image[:,:,2] = img[:]
print(image.shape)
landmark = np.array(label[i,:])
print(landmark)
# print(landmark.shape)
cv2.imwrite(os.path.join('../data/300w_ori/result',str(i)+'.jpg'),image)
img = cv2.imread(os.path.join('../data/300w_ori/result',str(i)+'.jpg'))
for j in range(68):
x = int(round(landmark[2*j]*60))
y = int(round(landmark[2*j+1]*60))
# print(x,y)
cv2.circle(img,(x,y),1,(0,0,255),1)
Euler = np.array(pose[i,:])
# print(Euler.shape)
cv2.imwrite(os.path.join('../data/300w_ori/result',str(i)+'.jpg'), img)