这里主要记录基于yolo8pose的改动过程.
#添加计算NME的函数
def compute_nme(lms_pred, lms_gt, norm):
lms_pred = lms_pred.reshape((-1, 2))
lms_gt = lms_gt.reshape((-1, 2))
# nme = np.mean(np.linalg.norm(lms_pred - lms_gt, axis=1)) / norm
nme = np.sum(np.linalg.norm(lms_pred - lms_gt, axis=1)) / (norm * 68) # from hrnet
return nme
# 添加打印关键点和人脸框的函数,方便分析误差大的case
def show_results(img, xyxy, conf, landmarks, class_num, color):
if color > 7:
color = 7
colors = [(255,0,0),(0,255,0),(0,0,255),(255,255,0),(255,255,255), (0,255,255), (0,255,255), (0,255,255)]
# 1----- green color
# 4----- red color
h,w,c = img.shape
tl = 1 or round(0.002 * (h + w) / 2) + 1 # line/font thickness
x1 = int(xyxy[0])
y1 = int(xyxy[1])
x2 = int(xyxy[2])
y2 = int(xyxy[3])
cv2.rectangle(img, (x1,y1), (x2, y2), colors[1], thickness=tl, lineType=cv2.LINE_AA)
# for i in range(5):
for i in range(67):
point_x = int(landmarks[3 * i])
point_y = int(landmarks[3 * i + 1])
# cv2.circle(img, (point_x, point_y), tl+1, colors[i%5], -1)
cv2.circle(img, (point_x, point_y), tl+1, colors[color], -1)
tf = max(tl - 1, 1) # font thickness
label = str(conf)[:5]
cv2.putText(img, label, (x1, y1 - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
return img
# 根据归一化后的tensor 重新转为cv2画图的格式
def tensor2img(input_tensor):
# 复制一份
input_tensor = batch['img'][si].clone().detach()
# 到cpu
input_tensor = input_tensor.to(torch.device('cpu'))
# 去掉批次维度
input_tensor = input_tensor.squeeze()
input_tensor = input_tensor.mul_(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).type(torch.uint8).numpy()
orgImg = cv2.cvtColor(input_tensor, cv2.COLOR_RGB2BGR)
def update_metrics(self, preds, batch):
#在该函数中添加计算NME的逻辑
#######################################################################
norm_indices = [36, 45]
norm = np.linalg.norm(tkpts.reshape(-1, 2)[norm_indices[0]].cpu() - tkpts.reshape(-1, 2)[norm_indices[1]].cpu())
targetcoord = tkpts.cpu() #torch.Size([1, 68, 3])
predncoord = np.zeros((68, 2)) #torch.Size([1, 68, 3])---->(68,2)
num_std = 10000000
for i in range(len(pred)):
predncoord[:,0] = predn[i][6::3].cpu()
predncoord[:,1] = predn[i][7::3].cpu()
tempNME = compute_nme(torch.from_numpy(predncoord), targetcoord[0,:,0:2], norm)
if tempNME < num_std:
num_std = tempNME
if num_std > 0.3:
print(num_std)
num_std = 0
# method A
fileName = batch['im_file'][si]
orgImg = cv2.imread(fileName)
conf_max = 0
for n in range(len(pred)):
xyxy = predn[n, :4].view(-1).tolist()
conf = predn[n, 4].cpu().numpy()
if conf > conf_max:
conf_max = conf
max_index = n
landmarks = predn[max_index, 6:].view(-1).tolist() #204
# class_num = det[j, 15].cpu().numpy()
class_num = predn[max_index, 5].cpu().numpy()
#method 1: to show predicted result with all points.
orgImg = show_results(orgImg, xyxy, conf, landmarks, class_num, color = 4)
tbox = tbox[0].view(-1).tolist()
tkptLabel = torch.zeros(204)
tkptLabel[0::3] = targetcoord[:,:, 0] # targetcoord torch.Size([1, 68, 3])
tkptLabel[1::3] = targetcoord[:,:, 1]
tkpts = tkptLabel.view(-1).tolist()
# #method 1: to show ground-truth with all points.
orgImg = show_results(orgImg, tbox, 1, tkpts, class_num, color = 1)
cv2.imwrite(os.fspath(self.save_dir) + '/'+ fileName.split('/')[-1], orgImg)
# cv2.imshow(fileName.split('/')[-1], orgImg)
##########################################################################
self.nmes_std.append(num_std)
########################################################################
这里不是直接计算误差,而是先把图片name和对应的欧拉角都保存到txt中,以准备第二阶段分析;因为我们没有把angle的数据放入到yolo中.
#保存路经
save_txt_path = os.path.join(os.fspath(self.save_dir) + 'ypr.txt')
#获取当前pic和对应的landmark
landmarks = predn[max_index, 6:].view(-1).tolist() #204
# class_num = det[j, 15].cpu().numpy()
class_num = predn[max_index, 5].cpu().numpy()
#放入headpose函数,计算欧拉角
rotation_vector, translation_vector = draw_headpose(orgImg, landmarks)
#保存为对应的txt格式
with open(save_txt_path, 'a') as f_txt:
# for i, v in enumerate(nmes_std):
rotation_vector = ' '.join(str(e) for e in rotation_vector.flatten())
translation_vector = ' '.join(str(e) for e in translation_vector.flatten())
f_txt.write(fileName.split('/')[-1] + " " +str(rotation_vector) + " " +str(translation_vector) + '\n')
处理单个文件的逻辑
def improve_process_aflw2000(root_folder, folder_name, image_name, label_name, target_size):
image_path = os.path.join(root_folder, folder_name, image_name)
label_path = os.path.join(root_folder, folder_name, label_name)
with open(label_path, 'r') as ff:
C = sio.loadmat(label_path)
anno = C['pt3d_68']
anno_x = anno[0]
anno_y = anno[1]
bbox_xmin = min(anno_x)
bbox_ymin = min(anno_y)
bbox_xmax = max(anno_x)
bbox_ymax = max(anno_y)
bbox_width = bbox_xmax - bbox_xmin + 1
bbox_height = bbox_ymax - bbox_ymin + 1
image = cv2.imread(image_path)
image_height, image_width, _ = image.shape
bbox_xcenter = bbox_xmin + bbox_width/2
bbox_ycenter = bbox_ymin + bbox_height/2
# #We have to check whether the translation is right, So we have to draw the landmarks on the image.
# colors = [(255,0,0),(0,255,0),(0,0,255),(255,255,0),(255,255,255), (0,255,255), (0,255,255), (0,255,255)]
# # 1----- green color
# # 4----- red color
# h,w,c = image.shape
# tl = 1 or round(0.002 * (h + w) / 2) + 1 # line/font thickness
# x1 = int(bbox_xmin)
# y1 = int(bbox_ymin)
# x2 = int(bbox_xmax)
# y2 = int(bbox_ymax)
# cv2.rectangle(image, (x1,y1), (x2, y2), colors[1], thickness=tl, lineType=cv2.LINE_AA)
# # for i in range(5):
# for i in range(68):
# point_x = int(anno_x[i])
# point_y = int(anno_y[i])
# # cv2.circle(img, (point_x, point_y), tl+1, colors[i%5], -1)
# cv2.circle(image, (point_x, point_y), tl+1, colors[0], -1)
# tf = max(tl - 1, 1) # font thickness
# label = str(1)
# cv2.putText(image, label, (x1, y1 - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
# cv2.imwrite(image_path.split('.')[0].split('/')[-1] + "_68.jpg", image)
isCrowdAndXYWH = [0, bbox_xcenter/image_width, bbox_ycenter/image_height, bbox_width/image_width, bbox_height/image_height]
anno2 = [[x/image_width, y/image_height, 2] for x,y in np.c_[anno_x,anno_y]]
return image, isCrowdAndXYWH, anno2
处理文件夹的逻辑
if data_name == 'AFLW2000':
# folders_train = ['AFW', 'AFW_Flip','HELEN', 'HELEN_Flip','IBUG', 'IBUG_Flip','LFPW', 'LFPW_Flip']
folders_test = ['AFLW2000']
annos_test = {}
for folder_test in folders_test:
all_files = sorted(os.listdir(os.path.join(root_folder, data_name)))
image_files = [x for x in all_files if '.jpg' in x]
# label_files = [x for x in all_files if '.mat' in x]
label_files = [x.split('.')[0]+'.mat' for x in all_files if '.mat' in x]
assert len(image_files) == len(label_files)
for image_name, label_name in zip(image_files, label_files):
image_crop, isCrowdAndXYWH, anno = improve_process_aflw2000(os.path.join(root_folder), folder_test, image_name, label_name, target_size)
image_crop_name = image_name
# cv2.imwrite(os.path.join(root_folder, 'images', 'test', image_crop_name), image_crop) #写过一遍,不用再写
annos_test[image_crop_name] = isCrowdAndXYWH, anno
# step 1: 写目录文件
# with open(os.path.join(root_folder, 'test2yolo.txt'), 'w') as f:
# for image_crop_name, anno in annos_test.items():
# f.write('./images/test/' + image_crop_name) #./images/val2017/000000345356.jpg
# f.write('\n')
# step 2: 写单独的标记文件
for image_crop_name, anno in annos_test.items():
base_txt = os.path.basename(image_crop_name.split('.')[0]) + ".txt"
save_txt_path = os.path.join(root_folder,'labels', 'test', base_txt)
with open(save_txt_path, 'w') as f_txt:
for xywh in anno[0]:
f_txt.write(str(xywh)+' ')
for x, y, z in anno[1]:
f_txt.write(str(x)+' '+str(y)+' '+str(z)+' ')
f_txt.write('\n')
进一步工作,在AFLW-2000上验证.