抛转引玉:
up down还能准,侧脸时,up和down是混乱的:有时不准
适合只有一个人脸,多人脸不行,不是特别准,大部分准。
这个讲的不错
https://blog.csdn.net/u014090429/article/details/100762308
# encoding=utf8
import os
import numpy as np
import cv2
# from test import *
import math
def drawResult(img, yaw, pitch, roll, save_dir):
# img = cv2.imread(imgpath)
draw = img.copy()
cv2.putText(img, "Yaw:" + str(yaw), (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
cv2.putText(img, "Pitch:" + str(pitch), (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
cv2.putText(img, "Roll:" + str(roll), (20, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
last_status="up"
last_count=0
def headPosEstimate(img, landmarks):
# solvePnP函数的所有输入矩阵必须是double类型
# 3D model points
model_3d_points = np.array([(-225.0, 170.0, -135.0), # 左目左端
(225.0, 170.0, -135.0), # 右目右端, # Left eye
(0.0, 0.0, 0.0), # Nose tip
(-150.0, -150.0, -125.0), # 口の左端
(150.0, -150.0, -125.0)] , dtype=np.double) # Right Mouth corner)
img_size = img.shape
focal_length = img_size[1]
center = [img_size[1] / 2, img_size[0] / 2]
camera_matrix = np.array(([focal_length, 0, center[0]],
[0, focal_length, center[1]],
[0, 0, 1]), dtype=np.double)
dist_coeffs = np.array([0, 0, 0, 0], dtype=np.double)
# print(model_3d_points.shape, landmarks.shape, "================")
found, rotation_vector, translation_vector = cv2.solvePnP(model_3d_points, landmarks, camera_matrix, dist_coeffs)
# x = rotation_vector[0][0] / 3.1415926 * 180;
# y = - rotation_vector[1][0] / 3.1415926 * 180;
# z = -rotation_vector[2][0] / 3.1415926 * 180;
#
# if (z > 4):
# print("up", "x y z "+str(x)+" "+str(y)+" "+str(z))
#
# if (z < -3):
# print("down", "x y z "+str(x)+" "+str(y)+" "+str(z));
#
# if (y > 40):
# print("right", "x y z "+str(x)+" "+str(y)+" "+str(z));
#
# if (y < -40):
# print("left", "x y z "+str(x)+" "+str(y)+" "+str(z));
theta = cv2.norm(rotation_vector, cv2.NORM_L2)
#
# # transformed to quaterniond
w = math.cos(theta / 2)
x = math.sin(theta / 2) * rotation_vector[0][0] / theta
y = math.sin(theta / 2) * rotation_vector[1][0] / theta
z = math.sin(theta / 2) * rotation_vector[2][0] / theta
#
ysqr = y * y
# pitch (x-axis rotation)
t0 = 2.0 * (w * x + y * z)
t1 = 1.0 - 2.0 * (x * x + ysqr)
# print('t0:{}, t1:{}'.format(t0, t1))
pitch = math.atan2(t0, t1) - 0.8356857
# yaw (y-axis rotation)
t2 = 2.0 * (w * y - z * x)
if t2 > 1.0:
t2 = 1.0
if t2 < -1.0:
t2 = -1.0
yaw = math.asin(t2) + 0.005409
# roll (z-axis rotation)
t3 = 2.0 * (w * z + x * y)
t4 = 1.0 - 2.0 * (ysqr + z * z)
roll = math.atan2(t3, t4) - 2.573345436
# 单位转换:将弧度转换为度
pitch_degree = int((pitch / math.pi) * 180)
yaw_degree = int((yaw / math.pi) * 180)
roll_degree = int((roll / math.pi) * 180)
# drawResult(img, yaw, pitch, roll, save_dir)
global last_status,last_count
level=10
# draw = img.copy()
if yaw_degree > 6*level:
# print("left :yaw,pitch,roll", yaw, pitch, roll)
print("left :yaw,pitch,roll", pitch_degree, yaw_degree, roll_degree)
# print("x y z", x, y, z)
# cv2.putText(draw, output_yaw, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 255, 0))
elif yaw_degree < -6*level:
# print("x y z", x, y, z)
print("right :yaw,pitch,roll", pitch_degree, yaw_degree, roll_degree)
elif pitch_degree <- 220:
print(last_status, last_count)
if last_status == "up":
last_count =1
last_status = "down"
if last_status=="down":
if last_count>2:
print("down :yaw,pitch,roll", pitch_degree, yaw_degree, roll_degree)
else:
last_count +=1
elif pitch_degree >124:
print(last_status,last_count)
if last_status == "down":
last_count = 1
last_status = "up"
if last_status == "up":
if last_count > 2:
print("up :yaw,pitch,roll", pitch_degree, yaw_degree, roll_degree)
else:
last_count += 1
# output_pitch = "face upwards:" + str(abs(pitch_degree)) + " degrees"
# cv2.putText(draw, output_pitch, (20, 80), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 255, 0))
# if roll_degree < -level:
# output_roll = "face bends to the right:" + str(abs(roll_degree)) + " degrees"
# # cv2.putText(draw, output_roll, (20, 120), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 255, 0))
# print(output_roll)
# if roll_degree > level:
# output_roll = "face bends to the left:" + str(abs(roll_degree)) + " degrees"
# # cv2.putText(draw, output_roll, (20, 120), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 255, 0))
# print(output_roll)
# if abs(yaw) < 0.00001 and abs(pitch) < 0.00001 and abs(roll) < 0.00001:
# # cv2.putText(draw, "Initial ststus", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 255, 0))
# print("Initial ststus")
from __future__ import print_function
import os
import argparse
import torch
import torch.backends.cudnn as cudnn
import numpy as np
# from data import cfg_mnet, cfg_re50
# from layers.functions.prior_box import PriorBox
# from utils.nms.py_cpu_nms import py_cpu_nms
import cv2
# from models.retinaface import RetinaFace
from nets.prior_box import PriorBox
from nets.retinaface import RetinaFace
from utils.box_utils import decode, decode_landm, py_cpu_nms
import time
from pose_estimate import headPosEstimate
parser = argparse.ArgumentParser(description='Retinaface')
parser.add_argument('-m', '--trained_model', default='weights/mobilenet0.25_Final.pth',
type=str, help='Trained state_dict file path to open')
parser.add_argument('--network', default='mobile0.25', help='Backbone network mobile0.25 or resnet50')
parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference')
parser.add_argument('--confidence_threshold', default=0.98, type=float, help='confidence_threshold')
parser.add_argument('--top_k', default=15, type=int, help='top_k')
parser.add_argument('--nms_threshold', default=0.2, type=float, help='nms_threshold')
parser.add_argument('--keep_top_k', default=12, type=int, help='keep_top_k')
parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results')
parser.add_argument('--vis_thres', default=0.9, type=float, help='visualization_threshold')
args = parser.parse_args()
def check_keys(model, pretrained_state_dict):
ckpt_keys = set(pretrained_state_dict.keys())
model_keys = set(model.state_dict().keys())
used_pretrained_keys = model_keys & ckpt_keys
unused_pretrained_keys = ckpt_keys - model_keys
missing_keys = model_keys - ckpt_keys
print('Missing keys:{}'.format(len(missing_keys)))
print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
print('Used keys:{}'.format(len(used_pretrained_keys)))
assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
return True
def remove_prefix(state_dict, prefix):
''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
print('remove prefix \'{}\''.format(prefix))
f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
return {f(key): value for key, value in state_dict.items()}
def load_model(model, pretrained_path, load_to_cpu):
print('Loading pretrained model from {}'.format(pretrained_path))
if load_to_cpu:
pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
else:
device = torch.cuda.current_device()
pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
if "state_dict" in pretrained_dict.keys():
pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
else:
pretrained_dict = remove_prefix(pretrained_dict, 'module.')
check_keys(model, pretrained_dict)
model.load_state_dict(pretrained_dict, strict=False)
return model
if __name__ == '__main__':
torch.set_grad_enabled(False)
cfg = {
'min_sizes': [[16, 32], [64, 128], [256, 512]],
'steps': [8, 16, 32],
'variance': [0.1, 0.2],
'clip': False,
'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
'in_channel': 32,
'out_channel': 64
}
# net and model
# net and model
net = RetinaFace(cfg=cfg, phase = 'test')
net = load_model(net, args.trained_model, args.cpu)
net.eval()
print('Finished loading model!')
# print(net)
cudnn.benchmark = True
device = torch.device("cpu" if args.cpu else "cuda")
net = net.to(device)
resize = 1
vc = cv2.VideoCapture(0) # 读入视频文件
d = 0
exit_flag = False
c = 0
images = []
images_origin = []
time1 = time.time()
width=240
height=180
width=640
height=480
rval, img_raw = vc.read()
img_raw = cv2.resize(img_raw, (width, height))
im_height, im_width, _ = img_raw.shape
scale = torch.Tensor([im_width, im_height, im_width, im_height])
scale = scale.to(device)
priorbox = PriorBox(cfg, image_size=(im_height, im_width))
priors = priorbox.forward()
priors = priors.to(device)
while True: # 循环读取视频帧
rval, img_raw = vc.read()
img_raw=cv2.resize(img_raw,(width, height))
img = np.float32(img_raw)
tic = time.time()
img -= (104, 117, 123)
img = img.transpose(2, 0, 1)
img = torch.from_numpy(img).unsqueeze(0)
img = img.to(device)
loc, conf, landms = net(img) # forward pass
prior_data = priors.data
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
boxes = boxes * scale / resize
boxes = boxes.cpu().numpy()
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
img.shape[3], img.shape[2], img.shape[3], img.shape[2],
img.shape[3], img.shape[2]])
scale1 = scale1.to(device)
landms = landms * scale1 / resize
landms = landms.cpu().numpy()
# ignore low scores
inds = np.where(scores > args.confidence_threshold)[0]
boxes = boxes[inds]
landms = landms[inds]
scores = scores[inds]
# keep top-K before NMS
order = scores.argsort()[::-1][:args.top_k]
boxes = boxes[order]
landms = landms[order]
scores = scores[order]
# do NMS
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
keep = py_cpu_nms(dets, args.nms_threshold)
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
dets = dets[keep, :]
landms = landms[keep]
# keep top-K faster NMS
dets = dets[:args.keep_top_k, :]
landms = landms[:args.keep_top_k, :]
dets = np.concatenate((dets, landms), axis=1)
if time.time() - tic>0.4:
print('net forward time: {:.4f}'.format(time.time() - tic))
# show image
# if args.save_image:
for b in dets:
if b[4] < args.vis_thres:
continue
text = "{:.4f}".format(b[4])
b = list(map(int, b))
cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
cx = b[0]
cy = b[1] + 12
cv2.putText(img_raw, text, (cx, cy),
cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))
face_key = np.array(b[5:15], dtype=np.double).reshape(-1, 2)
# print(l)
# if landms.shape[0] != 0:
headPosEstimate(img_raw, face_key)
# landms
cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
cv2.circle(img_raw, (b[7], b[8]), 3, (0, 255, 255), 4)
cv2.circle(img_raw, (b[9], b[10]), 5, (255, 0, 255), 4)
cv2.circle(img_raw, (b[11], b[12]), 7, (0, 255, 0), 4)
cv2.circle(img_raw, (b[13], b[14]), 9, (255, 0, 0), 4)
# save image
cv2.imshow("sdf",img_raw)
cv2.waitKey(1)
# name = "test.jpg"
# cv2.imwrite(name, img_raw)
# landms = landms.reshape(-1,2)
# print(landms.shape,landms.dtype)
# if landms.shape[0] != 0:
# headPosEstimate(img_raw, landms[:5])