SimDR(现在已经改名叫SimCC,后文还是称SimDR)将姿态估计的Heatmap方法转换为分类方法,在HRNet上实现了涨点,并且减小了显存占用。作者已经在github上开源了代码,但是在MMPose上目前还没有实现,所以本篇文章就使用HRNet在MMPose上实现SimDR。
SimDR原文: Is 2D Heatmap Representation Even Necessary for Human Pose Estimation?
SimDR开源代码:SimCC
因为在MMPose上修改的部分较多,所以文章会分为以下几个部分:
在mmpose\datasets\pipelines路径下新建simDR_transform.py
仿照目录下的top_down_transform.py引用包并创建类simDR作为流水线,这里先不写simDR的具体内容:
import cv2
import numpy as np
from mmpose.datasets.builder import PIPELINES
@PIPELINES.register_module()
class simDR:
"""
simDR的数据预处理部分
"""
pass
接着在mmpose\datasets\pipelines的__init__.py文件中添加新建好的流水线类:
# Copyright (c) OpenMMLab. All rights reserved.
from .bottom_up_transform import * # noqa
from .gesture_transform import * # noqa
from .hand_transform import * # noqa
from .loading import * # noqa
from .mesh_transform import * # noqa
from .pose3d_transform import * # noqa
from .shared_transform import * # noqa
from .top_down_transform import * # noqa
from .simDR_transform import * #add new pipeline
这样我们就能在配置文件中使用simDR作为pipeline了。
因为我们编写的simDR类是要替换掉TopDownGenerateTarget这个类的,所以simDR类要在TopDownGenerateTarget类的基础上修改。
对照simDR的github源码,需要从配置文件传递一个simdr_split_ratio变量,同时,simDR又分为了普通的simDR和sa-simDR,所以我们simDR类的初始化定义如下:
class simDR:
"""
simDR的数据预处理部分
"""
def __init__(self,
sigma=2,
kernel=(11, 11),
valid_radius_factor=0.0546875,
target_type='GaussianHeatmap',
simdr_split_ratio=1,
encoding='simDR',
unbiased_encoding=False):
self.sigma = sigma
self.unbiased_encoding = unbiased_encoding
self.kernel = kernel
self.valid_radius_factor = valid_radius_factor
self.target_type = target_type
self.encoding = encoding
self.simdr_split_ratio=simdr_split_ratio
self.joints_weight=1
根据simDR源码添加三个类内函数:
class simDR:
"""
simDR的数据预处理部分
"""
def __init__(self,
sigma=2,
kernel=(11, 11),
valid_radius_factor=0.0546875,
target_type='GaussianHeatmap',
simdr_split_ratio=1,
encoding='simDR',
unbiased_encoding=False):
self.sigma = sigma
self.unbiased_encoding = unbiased_encoding
self.kernel = kernel
self.valid_radius_factor = valid_radius_factor
self.target_type = target_type
self.encoding = encoding
self.simdr_split_ratio=simdr_split_ratio
self.joints_weight=1
def adjust_target_weight(self,cfg, joint, target_weight, tmp_size):
# feat_stride = self.image_size / self.heatmap_size
mu_x = joint[0]
mu_y = joint[1]
image_size=cfg['image_size']
# Check that any part of the gaussian is in-bounds
ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
if ul[0] >= (image_size[0]) or ul[1] >= image_size[1] \
or br[0] < 0 or br[1] < 0:
# If not, just return the image as is
target_weight = 0
return target_weight
def filter_target_simdr(self, cfg,joints, joints_vis, image_size):
'''
:param joints: [num_joints, 3]
:param joints_vis: [num_joints, 3]
:param image_size: image_size
:return: target, target_weight(1: visible, 0: invisible)
'''
num_joints=cfg['num_joints']
target_weight = np.ones((num_joints, 1), dtype=np.float32)
target_weight[:, 0] = joints_vis[:, 0].copy()
# detect abnormal coords and make the weight 0
for joint_id in range(num_joints):
if joints[joint_id][1] < 0:
target_weight[joint_id] = 0
joints[joint_id][1]=0
elif joints[joint_id][1] >= image_size[1]:
target_weight[joint_id] = 0
joints[joint_id][1] = image_size[1] - 1
if joints[joint_id][0] < 0:
target_weight[joint_id] = 0
joints[joint_id][0] = 0
elif joints[joint_id][0] >= image_size[0]:
target_weight[joint_id] = 0
joints[joint_id][0] = image_size[0] - 1
return target_weight,joints
def generate_sa_simdr(self, cfg,joints, joints_vis):
'''
:param joints: [num_joints, 3]
:param joints_vis: [num_joints, 3]
:return: target, target_weight(1: visible, 0: invisible)
'''
num_joints=cfg['num_joints']
image_size=cfg['image_size']
target_weight = np.ones((num_joints, 1), dtype=np.float32)
target_weight[:, 0] = joints_vis[:, 0]
target_x = np.zeros((num_joints,
int(image_size[0]*self.simdr_split_ratio)),
dtype=np.float32)
target_y = np.zeros((num_joints,
int(image_size[1]*self.simdr_split_ratio)),
dtype=np.float32)
tmp_size = self.sigma * 3
for joint_id in range(num_joints):
target_weight[joint_id] = self.adjust_target_weight(cfg,joints[joint_id], target_weight[joint_id], tmp_size)
if target_weight[joint_id] == 0:
continue
mu_x = joints[joint_id][0] * self.simdr_split_ratio
mu_y = joints[joint_id][1] * self.simdr_split_ratio
x = np.arange(0, int(image_size[0]*self.simdr_split_ratio), 1, np.float32)
y = np.arange(0, int(image_size[1]*self.simdr_split_ratio), 1, np.float32)
v = target_weight[joint_id]
if v > 0.5:
target_x[joint_id] = (np.exp(- ((x - mu_x) ** 2) / (2 * self.sigma ** 2)))/(self.sigma*np.sqrt(np.pi*2))
target_y[joint_id] = (np.exp(- ((y - mu_y) ** 2) / (2 * self.sigma ** 2)))/(self.sigma*np.sqrt(np.pi*2))
return target_x, target_y, target_weight
最后修改__call__函数中的内容,因为是从TopDownGenerateTarget类中修改而来,所以又很多冗余的代码,可以依情况自行删除:
def __call__(self, results):
"""Generate the target heatmap."""
joints_3d = results['joints_3d']
joints_3d_visible = results['joints_3d_visible']
assert self.encoding in ['MSRA', 'Megvii', 'UDP','simdr','sa-simdr']
if self.encoding == 'MSRA':
if isinstance(self.sigma, list):
num_sigmas = len(self.sigma)
cfg = results['ann_info']
num_joints = cfg['num_joints']
heatmap_size = cfg['heatmap_size']
target = np.empty(
(0, num_joints, heatmap_size[1], heatmap_size[0]),
dtype=np.float32)
target_weight = np.empty((0, num_joints, 1), dtype=np.float32)
for i in range(num_sigmas):
target_i, target_weight_i = self._msra_generate_target(
cfg, joints_3d, joints_3d_visible, self.sigma[i])
target = np.concatenate([target, target_i[None]], axis=0)
target_weight = np.concatenate(
[target_weight, target_weight_i[None]], axis=0)
else:
target, target_weight = self._msra_generate_target(
results['ann_info'], joints_3d, joints_3d_visible,
self.sigma)
elif self.encoding == 'Megvii':
if isinstance(self.kernel, list):
num_kernels = len(self.kernel)
cfg = results['ann_info']
num_joints = cfg['num_joints']
W, H = cfg['heatmap_size']
target = np.empty((0, num_joints, H, W), dtype=np.float32)
target_weight = np.empty((0, num_joints, 1), dtype=np.float32)
for i in range(num_kernels):
target_i, target_weight_i = self._megvii_generate_target(
cfg, joints_3d, joints_3d_visible, self.kernel[i])
target = np.concatenate([target, target_i[None]], axis=0)
target_weight = np.concatenate(
[target_weight, target_weight_i[None]], axis=0)
else:
target, target_weight = self._megvii_generate_target(
results['ann_info'], joints_3d, joints_3d_visible,
self.kernel)
elif self.encoding == 'UDP':
if self.target_type.lower() == 'CombinedTarget'.lower():
factors = self.valid_radius_factor
channel_factor = 3
elif self.target_type.lower() == 'GaussianHeatmap'.lower():
factors = self.sigma
channel_factor = 1
else:
raise ValueError('target_type should be either '
"'GaussianHeatmap' or 'CombinedTarget'")
if isinstance(factors, list):
num_factors = len(factors)
cfg = results['ann_info']
num_joints = cfg['num_joints']
W, H = cfg['heatmap_size']
target = np.empty((0, channel_factor * num_joints, H, W),
dtype=np.float32)
target_weight = np.empty((0, num_joints, 1), dtype=np.float32)
for i in range(num_factors):
target_i, target_weight_i = self._udp_generate_target(
cfg, joints_3d, joints_3d_visible, factors[i],
self.target_type)
target = np.concatenate([target, target_i[None]], axis=0)
target_weight = np.concatenate(
[target_weight, target_weight_i[None]], axis=0)
else:
target, target_weight = self._udp_generate_target(
results['ann_info'], joints_3d, joints_3d_visible, factors,
self.target_type)
#add
elif self.encoding== 'simdr':
cfg = results['ann_info']
joints_split = joints_3d.copy()
joints_split = np.around(joints_split * self.simdr_split_ratio)
joints_split = joints_split.astype(np.int64)
target_weight,filtered_joints = self.filter_target_simdr(cfg,joints_split.copy(), joints_3d_visible, cfg['image_size']*self.simdr_split_ratio)
target = filtered_joints[:,0:2]
elif self.encoding== 'sa-simdr':
cfg = results['ann_info']
target_x, target_y, target_weight = self.generate_sa_simdr(cfg,joints_3d, joints_3d_visible)
target=(target_x, target_y)
else:
raise ValueError(
f'Encoding approach {self.encoding} is not supported!')
results['target'] = target
results['target_weight'] = target_weight
return results
这里根据源码使用if elif语句将预处理分为了simdr和sa-simdr,两种方式的处理方式不同,调用的函数也不同,请注意。results['ann_info']是mmpose的官方代码变量,可以存储配置文件的各种信息,例如heatmap和input的大小等。
后续会提供完整的py文件供大家参考,由于是在官方代码基础上修改,又很多没有必要的代码,可以自行删除。文章更新完成后我会将所以代码打包上传到github供大家学校交流。
喜欢这篇文章的话请多多收藏点赞,谢谢大家的鼓励。