YOLO8添加facial landmark和Head Pose的评价逻辑


目录

    • @[TOC](目录)
  • 前言
  • 一、如何在val.py中添加NME的逻辑
  • 二、在val.py中添加Angle Eorror的逻辑
    • 1.引入库
  • 三、将AFLW2000转为yolo格式
    • 1.参考ultralyticsFaceMark/process300LP2Yolo3D.py
  • 总结

前言

这里主要记录基于yolo8pose的改动过程.


一、如何在val.py中添加NME的逻辑

#添加计算NME的函数
def compute_nme(lms_pred, lms_gt, norm):
    lms_pred = lms_pred.reshape((-1, 2))
    lms_gt = lms_gt.reshape((-1, 2))
    # nme = np.mean(np.linalg.norm(lms_pred - lms_gt, axis=1)) / norm 
    nme = np.sum(np.linalg.norm(lms_pred - lms_gt, axis=1)) / (norm * 68)  # from hrnet
    return nme

# 添加打印关键点和人脸框的函数,方便分析误差大的case
def show_results(img, xyxy, conf, landmarks, class_num, color):

    if color > 7:
        color = 7
    colors = [(255,0,0),(0,255,0),(0,0,255),(255,255,0),(255,255,255), (0,255,255), (0,255,255), (0,255,255)]
    # 1----- green color
    # 4----- red color
    
    h,w,c = img.shape
    tl = 1 or round(0.002 * (h + w) / 2) + 1  # line/font thickness
    x1 = int(xyxy[0])
    y1 = int(xyxy[1])
    x2 = int(xyxy[2])
    y2 = int(xyxy[3])
    cv2.rectangle(img, (x1,y1), (x2, y2), colors[1], thickness=tl, lineType=cv2.LINE_AA)  


    # for i in range(5):
    for i in range(67):
        point_x = int(landmarks[3 * i])
        point_y = int(landmarks[3 * i + 1])
        # cv2.circle(img, (point_x, point_y), tl+1, colors[i%5], -1)
        cv2.circle(img, (point_x, point_y), tl+1, colors[color], -1)

    tf = max(tl - 1, 1)  # font thickness
    label = str(conf)[:5]
    cv2.putText(img, label, (x1, y1 - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
    return img

# 根据归一化后的tensor 重新转为cv2画图的格式
def tensor2img(input_tensor):
	# 复制一份
	input_tensor = batch['img'][si].clone().detach()
	# 到cpu
	input_tensor = input_tensor.to(torch.device('cpu'))
	# 去掉批次维度
	input_tensor = input_tensor.squeeze()
	input_tensor = input_tensor.mul_(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).type(torch.uint8).numpy()
	orgImg = cv2.cvtColor(input_tensor, cv2.COLOR_RGB2BGR)

                
def update_metrics(self, preds, batch):
    #在该函数中添加计算NME的逻辑
	####################################################################### 
            norm_indices = [36, 45]
            norm = np.linalg.norm(tkpts.reshape(-1, 2)[norm_indices[0]].cpu() - tkpts.reshape(-1, 2)[norm_indices[1]].cpu())

            targetcoord = tkpts.cpu()  #torch.Size([1, 68, 3])

            predncoord = np.zeros((68, 2))  #torch.Size([1, 68, 3])---->(68,2)

            num_std = 10000000
            for i in range(len(pred)):
                predncoord[:,0] = predn[i][6::3].cpu()  
                predncoord[:,1] = predn[i][7::3].cpu() 
                tempNME = compute_nme(torch.from_numpy(predncoord), targetcoord[0,:,0:2], norm)
                if tempNME < num_std:
                    num_std = tempNME

            if num_std > 0.3:
                print(num_std)
                num_std =  0

                # method A
                fileName = batch['im_file'][si]
                orgImg = cv2.imread(fileName)
                
                conf_max = 0
                for n in range(len(pred)):

                    xyxy = predn[n, :4].view(-1).tolist()
                    conf = predn[n, 4].cpu().numpy()
                    if conf > conf_max:
                        conf_max = conf
                        max_index = n 
                    
                landmarks = predn[max_index, 6:].view(-1).tolist()  #204
                # class_num = det[j, 15].cpu().numpy()
                class_num = predn[max_index, 5].cpu().numpy()

                #method 1: to show predicted result with all points.
                orgImg = show_results(orgImg, xyxy, conf, landmarks, class_num, color = 4)

                tbox  = tbox[0].view(-1).tolist()
                tkptLabel = torch.zeros(204)
                tkptLabel[0::3] = targetcoord[:,:, 0]  #  targetcoord torch.Size([1, 68, 3])
                tkptLabel[1::3] = targetcoord[:,:, 1]
                tkpts = tkptLabel.view(-1).tolist()

                # #method 1: to show ground-truth with all points.
                orgImg = show_results(orgImg, tbox, 1, tkpts, class_num, color = 1)

                cv2.imwrite(os.fspath(self.save_dir) + '/'+ fileName.split('/')[-1], orgImg)
                # cv2.imshow(fileName.split('/')[-1], orgImg)
                ##########################################################################

            self.nmes_std.append(num_std)
            ########################################################################

二、在val.py中添加Angle Eorror的逻辑

这里不是直接计算误差,而是先把图片name和对应的欧拉角都保存到txt中,以准备第二阶段分析;因为我们没有把angle的数据放入到yolo中.

1.引入库

#保存路经
save_txt_path = os.path.join(os.fspath(self.save_dir) + 'ypr.txt')

#获取当前pic和对应的landmark
landmarks = predn[max_index, 6:].view(-1).tolist()  #204
# class_num = det[j, 15].cpu().numpy()
class_num = predn[max_index, 5].cpu().numpy()

#放入headpose函数,计算欧拉角
rotation_vector, translation_vector = draw_headpose(orgImg, landmarks)  

#保存为对应的txt格式
with open(save_txt_path, 'a') as f_txt:
    # for  i, v in enumerate(nmes_std):
    rotation_vector = ' '.join(str(e) for e in rotation_vector.flatten())
    translation_vector = ' '.join(str(e) for e in translation_vector.flatten())
    f_txt.write(fileName.split('/')[-1] + " " +str(rotation_vector) + " " +str(translation_vector)  + '\n') 

三、将AFLW2000转为yolo格式

1.参考ultralyticsFaceMark/process300LP2Yolo3D.py

处理单个文件的逻辑

def improve_process_aflw2000(root_folder, folder_name, image_name, label_name, target_size):

    image_path = os.path.join(root_folder, folder_name, image_name)
    label_path = os.path.join(root_folder, folder_name, label_name)

    with open(label_path, 'r') as ff:
        C = sio.loadmat(label_path)
        anno = C['pt3d_68'] 

        anno_x = anno[0]
        anno_y = anno[1]

        bbox_xmin = min(anno_x)
        bbox_ymin = min(anno_y)
        bbox_xmax = max(anno_x)
        bbox_ymax = max(anno_y)

        bbox_width = bbox_xmax - bbox_xmin + 1
        bbox_height = bbox_ymax - bbox_ymin + 1
        
        image = cv2.imread(image_path)
        image_height, image_width, _ = image.shape
        bbox_xcenter = bbox_xmin + bbox_width/2
        bbox_ycenter = bbox_ymin + bbox_height/2


        # #We have to check whether the translation is right, So we have to draw the landmarks on the image.
        # colors = [(255,0,0),(0,255,0),(0,0,255),(255,255,0),(255,255,255), (0,255,255), (0,255,255), (0,255,255)]
        # # 1----- green color
        # # 4----- red color
        
        # h,w,c = image.shape
        # tl = 1 or round(0.002 * (h + w) / 2) + 1  # line/font thickness
        # x1 = int(bbox_xmin)
        # y1 = int(bbox_ymin)
        # x2 = int(bbox_xmax)
        # y2 = int(bbox_ymax)
        # cv2.rectangle(image, (x1,y1), (x2, y2), colors[1], thickness=tl, lineType=cv2.LINE_AA)  

        # # for i in range(5):
        # for i in range(68):
        #     point_x = int(anno_x[i])
        #     point_y = int(anno_y[i])
        #     # cv2.circle(img, (point_x, point_y), tl+1, colors[i%5], -1)
        #     cv2.circle(image, (point_x, point_y), tl+1, colors[0], -1)
            
        # tf = max(tl - 1, 1)  # font thickness
        # label = str(1)
        # cv2.putText(image, label, (x1, y1 - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
        # cv2.imwrite(image_path.split('.')[0].split('/')[-1] + "_68.jpg", image)


        isCrowdAndXYWH = [0, bbox_xcenter/image_width, bbox_ycenter/image_height, bbox_width/image_width, bbox_height/image_height]
        
        anno2 = [[x/image_width, y/image_height, 2] for x,y in np.c_[anno_x,anno_y]]

        return image, isCrowdAndXYWH, anno2

处理文件夹的逻辑

if data_name == 'AFLW2000':
        # folders_train = ['AFW', 'AFW_Flip','HELEN', 'HELEN_Flip','IBUG', 'IBUG_Flip','LFPW', 'LFPW_Flip']
        folders_test = ['AFLW2000']
        annos_test = {}
        for folder_test in folders_test:
            all_files = sorted(os.listdir(os.path.join(root_folder, data_name)))
            image_files = [x for x in all_files if '.jpg'  in x]
            # label_files = [x for x in all_files if '.mat' in x]
            label_files = [x.split('.')[0]+'.mat' for x in all_files if '.mat' in x]
            assert len(image_files) == len(label_files)
            for image_name, label_name in zip(image_files, label_files):
                
                image_crop, isCrowdAndXYWH, anno = improve_process_aflw2000(os.path.join(root_folder), folder_test, image_name, label_name, target_size)               
                image_crop_name = image_name
                # cv2.imwrite(os.path.join(root_folder, 'images', 'test', image_crop_name), image_crop)  #写过一遍,不用再写
                annos_test[image_crop_name] =   isCrowdAndXYWH, anno

        # step 1: 写目录文件
        # with open(os.path.join(root_folder, 'test2yolo.txt'), 'w') as f:
        #     for image_crop_name, anno in annos_test.items():
        #         f.write('./images/test/' + image_crop_name)   #./images/val2017/000000345356.jpg
        #         f.write('\n')

        # step 2: 写单独的标记文件
        for image_crop_name, anno in annos_test.items():
            base_txt = os.path.basename(image_crop_name.split('.')[0]) + ".txt"
            save_txt_path = os.path.join(root_folder,'labels', 'test', base_txt)
            with open(save_txt_path, 'w') as f_txt:
                for xywh in anno[0]:
                    f_txt.write(str(xywh)+' ')
                for x, y, z in anno[1]:
                    f_txt.write(str(x)+' '+str(y)+' '+str(z)+' ')
                f_txt.write('\n') 


总结

进一步工作,在AFLW-2000上验证.

你可能感兴趣的:(python,计算机视觉)