本文为个人随笔,为了记录阅读facenet中使用的mtcnn的代码的一些笔记。本文使用的是https://github.com/davidsandberg/facenet 中实现的mtcnn的代码。这个facenet的代码中有使用mtcnn的代码。本文主要记录mtcnn返回的关键点坐标和boxes框坐标的顺序。
写了一个测试的代码如下
import cv2
import numpy as np
import matplotlib.pyplot as plot
import detect
from scipy import misc
import facenet
import time, threading
import copy
import tensorflow as tf
import detect_face
import os
import json
import sys
def angle_between_2_points(p1, p2):
x1, y1 = p1
x2, y2 = p2
tan = (y2 - y1) / (x2 - x1)
return np.degrees(np.arctan(tan))
def get_rotation_matrix(p1, p2, bb):
angle = angle_between_2_points(p1, p2)
x1, y1 = p1
x2, y2 = p2
xc = (bb[0] + bb[2]) // 2
yc = (bb[1] + bb[3]) // 2
M = cv2.getRotationMatrix2D((xc, yc), angle, 1)
return M
def det_face(image_path):
if os.path.exists(image_path):
img = misc.imread(image_path)
if img.ndim == 2:
img = facenet.to_rgb(img)
img = img[:, :, 0:3]
height, width = img.shape[:2]
bbox, landmarks = align_data(img, margin=16, score_threshold=0.9)
for landmark in landmarks:
for i in range(int(landmark.size / 2) ): #m默认是小数除法,整数除法使用 //
cv2.circle(img, (int(landmark[i]), int(int(landmark[i + 5]))), 3, (0, 0, 255))
cv2.imshow("lala", img)
cv2.waitKey(0)
i = 0
for landmark in landmarks:
M = get_rotation_matrix((landmark[0], landmark[5]), (landmark[1], landmark[6]), bbox[i])
rotated = cv2.warpAffine(img, M, (width, height), flags=cv2.INTER_CUBIC)
cv2.imshow("rotated", rotated)
cv2.waitKey(0)
i += 1
img_list = []
if bbox is not None:
for bb in bbox:
cropped_img = img[bb[1]:bb[3], bb[0]:bb[2], :]
cv2.imshow("cropped_img", cropped_img)
cv2.waitKey(0)
img_list.append(cropped_img)
align_img_list = []
if bbox is not None:
for bb in bbox:
rotated_cropped_img = rotated [bb[1]:bb[3], bb[0]:bb[2], :]
cv2.imshow("rotated_cropped_img", rotated_cropped_img)
cv2.waitKey(0)
align_img_list.append(rotated_cropped_img)
return img, img_list, bbox
else:
return None, None
def align_data(img, margin=32, score_threshold=0.9):
minsize = 20 # minimum size of face
threshold = [0.6, 0.7, 0.7] # three steps's threshold
factor = 0.709 # scale factor
# img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_size = np.asarray(img.shape)[0:2]
tf.Graph().as_default()
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
pnet, rnet, onet = detect_face.create_mtcnn(sess, None)
bounding_boxes, points = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
if len(bounding_boxes) < 1:
bbox = None
return None, None
else:
row, col = bounding_boxes.shape
bbox = []
landmark_list = []
for i in range(row):
det = np.squeeze(bounding_boxes[i, 0:])
landmarks = np.squeeze(points[:,i])
bb = []
if det[-1] > score_threshold:
landmark_list.append(landmarks)
bb.append(np.maximum(det[0] - margin // 2, 0))
bb.append(np.maximum(det[1] - margin // 2, 0))
bb.append(np.minimum(det[2] + margin // 2, img_size[1]))
bb.append(np.minimum(det[3] + margin // 2, img_size[0]))
bbox.append(np.array(bb,dtype=np.int32))
return np.array(bbox,dtype=np.int32), np.array(landmark_list,dtype=np.int32)
image_path = './demo_images/twofaces.jpg'
#image_path = './demo_images/Aaron_Patterson_0001.jpg'
#image_path = './demo_images/Aaron_Eckhart_0001.jpg'
det_face(image_path)
在align_data()函数中调用,detect_face.detect_face()函数,返回bounding_boxes, points,bounding_boxes 就是返回的人脸框,points就是返回的关键点。mtcnn返回5个关键点,分别是左眼,右眼,鼻子,左嘴角,右嘴角。bounding_boxes是一个n*5的数组,n表示检测到的人脸数目,每一行5个元素是一个行数组,表示一个人脸框信息。bb = bounding_boxes[0]表示第一个人脸框信息,bb[0]表示人脸框的左上角的水平方向的坐标(列的坐标),bb[1]表示人脸框的左上角的垂直方向的坐标(行的坐标),bb[2]表示人脸框的右下角的水平方向的坐标(列的坐标),bb[3]表示人脸框的右下角的垂直方向的坐标(行的坐标),bb[4]表示检测到的是人脸的score。points是一个10*n的数组,n表示检测到的人脸数目,每一列10个元素是一个列数组,表示一个人脸关键点的信息。point = points[:,0]表示第一个人脸关键点信息,point [0:4]是五个关键点在水平方向上的坐标(列坐标),point [5:9]是五个关键点在垂直方向上的坐标(行坐标)。即,(point [0],point[5])表示左眼的坐标,(point [1],point[6])表示右眼的坐标,(point [2],point[7])表示鼻子的坐标,(point [3],point[8])表示左嘴角的坐标,(point [4],point[9])表示右嘴角的坐标。
使以上的代码,和以下的图片,画出人脸的关键点,效果如下。