retinaface人脸姿态

抛转引玉:

up down还能准,侧脸时,up和down是混乱的:有时不准

适合只有一个人脸,多人脸不行,不是特别准,大部分准。

这个讲的不错

https://blog.csdn.net/u014090429/article/details/100762308

# encoding=utf8
import os
import numpy as np
import cv2
# from test import *
import math


def drawResult(img, yaw, pitch, roll, save_dir):
    # img = cv2.imread(imgpath)
    draw = img.copy()
    cv2.putText(img, "Yaw:" + str(yaw), (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
    cv2.putText(img, "Pitch:" + str(pitch), (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
    cv2.putText(img, "Roll:" + str(roll), (20, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
last_status="up"
last_count=0

def headPosEstimate(img, landmarks):
    # solvePnP函数的所有输入矩阵必须是double类型
    # 3D model points
    model_3d_points = np.array([(-225.0, 170.0, -135.0),  # 左目左端
    (225.0, 170.0, -135.0),  # 右目右端,  # Left eye

                                (0.0, 0.0, 0.0),  # Nose tip
                                 (-150.0, -150.0, -125.0),  # 口の左端
                                 (150.0, -150.0, -125.0)] , dtype=np.double)  # Right Mouth corner)

    img_size = img.shape
    focal_length = img_size[1]
    center = [img_size[1] / 2, img_size[0] / 2]
    camera_matrix = np.array(([focal_length, 0, center[0]],
                              [0, focal_length, center[1]],
                              [0, 0, 1]), dtype=np.double)

    dist_coeffs = np.array([0, 0, 0, 0], dtype=np.double)
    # print(model_3d_points.shape, landmarks.shape, "================")

    found, rotation_vector, translation_vector = cv2.solvePnP(model_3d_points, landmarks, camera_matrix, dist_coeffs)

    # x = rotation_vector[0][0] / 3.1415926 * 180;
    # y = - rotation_vector[1][0] / 3.1415926 * 180;
    # z = -rotation_vector[2][0] / 3.1415926 * 180;
    #
    # if (z > 4):
    #     print("up", "x y z "+str(x)+" "+str(y)+" "+str(z))
    #
    # if (z < -3):
    #     print("down", "x y z "+str(x)+" "+str(y)+" "+str(z));
    #
    # if (y > 40):
    #     print("right", "x y z "+str(x)+" "+str(y)+" "+str(z));
    #
    # if (y < -40):
    #     print("left", "x y z "+str(x)+" "+str(y)+" "+str(z));


    theta = cv2.norm(rotation_vector, cv2.NORM_L2)
    #
    # # transformed to quaterniond
    w = math.cos(theta / 2)
    x = math.sin(theta / 2) * rotation_vector[0][0] / theta
    y = math.sin(theta / 2) * rotation_vector[1][0] / theta
    z = math.sin(theta / 2) * rotation_vector[2][0] / theta
    #

    ysqr = y * y
    # pitch (x-axis rotation)
    t0 = 2.0 * (w * x + y * z)
    t1 = 1.0 - 2.0 * (x * x + ysqr)
    # print('t0:{}, t1:{}'.format(t0, t1))
    pitch = math.atan2(t0, t1) - 0.8356857

    # yaw (y-axis rotation)
    t2 = 2.0 * (w * y - z * x)
    if t2 > 1.0:
        t2 = 1.0
    if t2 < -1.0:
        t2 = -1.0
    yaw = math.asin(t2) + 0.005409

    # roll (z-axis rotation)
    t3 = 2.0 * (w * z + x * y)
    t4 = 1.0 - 2.0 * (ysqr + z * z)
    roll = math.atan2(t3, t4) - 2.573345436

    # 单位转换:将弧度转换为度
    pitch_degree = int((pitch / math.pi) * 180)
    yaw_degree = int((yaw / math.pi) * 180)
    roll_degree = int((roll / math.pi) * 180)

    # drawResult(img, yaw, pitch, roll, save_dir)

    global last_status,last_count
    level=10


    # draw = img.copy()
    if yaw_degree > 6*level:
        # print("left :yaw,pitch,roll", yaw, pitch, roll)
        print("left :yaw,pitch,roll", pitch_degree, yaw_degree, roll_degree)
        # print("x y z", x, y, z)
        # cv2.putText(draw, output_yaw, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 255, 0))
    elif yaw_degree < -6*level:
        # print("x y z", x, y, z)
        print("right :yaw,pitch,roll", pitch_degree, yaw_degree, roll_degree)
    elif pitch_degree <- 220:
        print(last_status, last_count)
        if last_status == "up":
            last_count =1
            last_status = "down"
        if last_status=="down":
            if last_count>2:
                print("down :yaw,pitch,roll", pitch_degree, yaw_degree, roll_degree)
            else:
                last_count +=1
    elif pitch_degree >124:
        print(last_status,last_count)
        if last_status == "down":
            last_count = 1
            last_status = "up"
        if last_status == "up":
            if last_count > 2:
                print("up :yaw,pitch,roll", pitch_degree, yaw_degree, roll_degree)
            else:
                last_count += 1
        # output_pitch = "face upwards:" + str(abs(pitch_degree)) + " degrees"
        # cv2.putText(draw, output_pitch, (20, 80), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 255, 0))
    # if roll_degree < -level:
    #     output_roll = "face bends to the right:" + str(abs(roll_degree)) + " degrees"
    #     # cv2.putText(draw, output_roll, (20, 120), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 255, 0))
    #     print(output_roll)
    # if roll_degree > level:
    #     output_roll = "face bends to the left:" + str(abs(roll_degree)) + " degrees"
    #     # cv2.putText(draw, output_roll, (20, 120), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 255, 0))
    #     print(output_roll)
    # if abs(yaw) < 0.00001 and abs(pitch) < 0.00001 and abs(roll) < 0.00001:
    #     # cv2.putText(draw, "Initial ststus", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 255, 0))
    #     print("Initial ststus")
from __future__ import print_function
import os
import argparse
import torch
import torch.backends.cudnn as cudnn
import numpy as np
# from data import cfg_mnet, cfg_re50
# from layers.functions.prior_box import PriorBox
# from utils.nms.py_cpu_nms import py_cpu_nms
import cv2
# from models.retinaface import RetinaFace
from nets.prior_box import PriorBox
from nets.retinaface import RetinaFace
from utils.box_utils import decode, decode_landm, py_cpu_nms
import time
from pose_estimate import headPosEstimate

parser = argparse.ArgumentParser(description='Retinaface')

parser.add_argument('-m', '--trained_model', default='weights/mobilenet0.25_Final.pth',
                    type=str, help='Trained state_dict file path to open')
parser.add_argument('--network', default='mobile0.25', help='Backbone network mobile0.25 or resnet50')
parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference')
parser.add_argument('--confidence_threshold', default=0.98, type=float, help='confidence_threshold')
parser.add_argument('--top_k', default=15, type=int, help='top_k')
parser.add_argument('--nms_threshold', default=0.2, type=float, help='nms_threshold')
parser.add_argument('--keep_top_k', default=12, type=int, help='keep_top_k')
parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results')
parser.add_argument('--vis_thres', default=0.9, type=float, help='visualization_threshold')
args = parser.parse_args()


def check_keys(model, pretrained_state_dict):
    ckpt_keys = set(pretrained_state_dict.keys())
    model_keys = set(model.state_dict().keys())
    used_pretrained_keys = model_keys & ckpt_keys
    unused_pretrained_keys = ckpt_keys - model_keys
    missing_keys = model_keys - ckpt_keys
    print('Missing keys:{}'.format(len(missing_keys)))
    print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
    print('Used keys:{}'.format(len(used_pretrained_keys)))
    assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
    return True


def remove_prefix(state_dict, prefix):
    ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
    print('remove prefix \'{}\''.format(prefix))
    f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
    return {f(key): value for key, value in state_dict.items()}


def load_model(model, pretrained_path, load_to_cpu):
    print('Loading pretrained model from {}'.format(pretrained_path))
    if load_to_cpu:
        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
    else:
        device = torch.cuda.current_device()
        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
    if "state_dict" in pretrained_dict.keys():
        pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
    else:
        pretrained_dict = remove_prefix(pretrained_dict, 'module.')
    check_keys(model, pretrained_dict)
    model.load_state_dict(pretrained_dict, strict=False)
    return model


if __name__ == '__main__':
    torch.set_grad_enabled(False)
    cfg = {
        'min_sizes': [[16, 32], [64, 128], [256, 512]],
        'steps': [8, 16, 32],
        'variance': [0.1, 0.2],
        'clip': False,
        'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
        'in_channel': 32,
        'out_channel': 64
    }

    # net and model
    # net and model
    net = RetinaFace(cfg=cfg, phase = 'test')
    net = load_model(net, args.trained_model, args.cpu)
    net.eval()
    print('Finished loading model!')
    # print(net)
    cudnn.benchmark = True
    device = torch.device("cpu" if args.cpu else "cuda")
    net = net.to(device)

    resize = 1

    vc = cv2.VideoCapture(0)  # 读入视频文件
    d = 0
    exit_flag = False
    c = 0
    images = []
    images_origin = []
    time1 = time.time()

    width=240
    height=180

    width=640
    height=480
    rval, img_raw = vc.read()
    img_raw = cv2.resize(img_raw, (width, height))
    im_height, im_width, _ = img_raw.shape
    scale = torch.Tensor([im_width, im_height, im_width, im_height])
    scale = scale.to(device)
    priorbox = PriorBox(cfg, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    while True:  # 循环读取视频帧
        rval, img_raw = vc.read()
        img_raw=cv2.resize(img_raw,(width, height))

        img = np.float32(img_raw)
        tic = time.time()
        img -= (104, 117, 123)
        img = img.transpose(2, 0, 1)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.to(device)


        loc, conf, landms = net(img)  # forward pass

        prior_data = priors.data
        boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
        boxes = boxes * scale / resize
        boxes = boxes.cpu().numpy()
        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
        landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
        scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                               img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                               img.shape[3], img.shape[2]])
        scale1 = scale1.to(device)
        landms = landms * scale1 / resize
        landms = landms.cpu().numpy()

        # ignore low scores
        inds = np.where(scores > args.confidence_threshold)[0]
        boxes = boxes[inds]
        landms = landms[inds]
        scores = scores[inds]

        # keep top-K before NMS
        order = scores.argsort()[::-1][:args.top_k]
        boxes = boxes[order]
        landms = landms[order]
        scores = scores[order]

        # do NMS
        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
        keep = py_cpu_nms(dets, args.nms_threshold)
        # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
        dets = dets[keep, :]
        landms = landms[keep]

        # keep top-K faster NMS
        dets = dets[:args.keep_top_k, :]
        landms = landms[:args.keep_top_k, :]

        dets = np.concatenate((dets, landms), axis=1)

        if time.time() - tic>0.4:
            print('net forward time: {:.4f}'.format(time.time() - tic))
        # show image

        # if args.save_image:
        for b in dets:
            if b[4] < args.vis_thres:
                continue
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))
            cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
            cx = b[0]
            cy = b[1] + 12
            cv2.putText(img_raw, text, (cx, cy),
                        cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

            face_key = np.array(b[5:15], dtype=np.double).reshape(-1, 2)
            # print(l)
            # if landms.shape[0] != 0:
            headPosEstimate(img_raw, face_key)


            # landms
            cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
            cv2.circle(img_raw, (b[7], b[8]), 3, (0, 255, 255), 4)
            cv2.circle(img_raw, (b[9], b[10]), 5, (255, 0, 255), 4)
            cv2.circle(img_raw, (b[11], b[12]), 7, (0, 255, 0), 4)
            cv2.circle(img_raw, (b[13], b[14]), 9, (255, 0, 0), 4)
        # save image
        cv2.imshow("sdf",img_raw)
        cv2.waitKey(1)
        # name = "test.jpg"
        # cv2.imwrite(name, img_raw)

        # landms = landms.reshape(-1,2)
        # print(landms.shape,landms.dtype)
        # if landms.shape[0] != 0:
        #     headPosEstimate(img_raw, landms[:5])

 

你可能感兴趣的:(深度学习)