直接将数据集标注转为 mmpose 支持的数据集格式(例如 MS COCO)。在 mmpose 中使用对应的数据集类(例如 TopdownCOCODataset)训练。
https://download.openmmlab.com/mmpose/datasets/coco_tiny.tar -O data/coco_tiny.tar
下载并解压到data/coco_tiny
在项目根目录新建utils
文件夹,新建scan_json.py
文件,用于查看json文件格式
import json
import pprint
json_path = r"../data/coco_tiny/train.json"
annotations = json.load(open(json_path, "r"))
pprint.pprint(annotations[0], compact=True)
运行结果如下:
{'bbox': [267.03, 104.32, 229.19, 320],
'image_file': '000000537548.jpg',
'image_size': [640, 480],
'keypoints': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 325, 160, 2, 398,
177, 2, 0, 0, 0, 437, 238, 2, 0, 0, 0, 477, 270, 2, 287, 255, 1,
339, 267, 2, 0, 0, 0, 423, 314, 2, 0, 0, 0, 355, 367, 2]}
显然,此数据集中包含17个关键点。
参考:https://github.com/open-mmlab/mmpose/blob/main/demo/MMPose_Tutorial.ipynb
进入此路径mmpose/datasets/datasets/body
新建tinycocodataset.py
来实现一个新的用于加载训练和验证模型样本数据的数据集类。假设我们要训练一个基于top-down方法的姿态估计模型,新的数据集类继承BaseCocoStyleDataset
。tinycocodataset.py
内容如下:
import json
import os.path as osp
from typing import Callable, List, Optional, Sequence, Union
import numpy as np
from mmengine.utils import check_file_exist
from mmpose.registry import DATASETS
from mmpose.datasets.datasets.base import BaseCocoStyleDataset
@DATASETS.register_module()
class TinyCocoDataset(BaseCocoStyleDataset):
# =======================================================================
# METAINFO的coco.py文件中包含关键点的信息(id、关键点名称、颜色,关键点连接方式等信息)
# =======================================================================
METAINFO: dict = dict(from_file='configs/_base_/datasets/coco.py')
def _load_annotations(self) -> List[dict]:
"""
以MPII格式从annotation中加载数据
MPII包含16个关键点,0 - r ankle, 1 - r knee, 2 - r hip,3 - l hip,4 - l knee, 5 - l ankle, 6 - l ankle, 7 - l ankle,8 - upper neck, 9 - head top,10 - r wrist,11 - r elbow, 12 - r shoulder, 13 - l shoulder,14 - l elbow, 15 - l wrist
bbox中信息x1,y1,x2,y2分别为左上角和右下角点的横、纵坐标
MPII数据集下载地址:http://human-pose.mpi-inf.mpg.de/#download
"""
# 读取annotation文件
check_file_exist(self.ann_file)
with open(self.ann_file) as anno_file:
anns = json.load(anno_file)
data_list = []
ann_id = 0
# 遍历所有annotation
for idx, ann in enumerate(anns):
# 获取图片的高和宽
img_h, img_w = ann['image_size']
# 获取bbox信息为(1,4)的向量,向量中信息分别为bbox左上角点的横、纵坐标以及bbox的宽、高
x, y, w, h = ann['bbox']
# 此处代码将bbox转换为MPII的bbox格式
# np.clip(a, a_min, a_max, out=None):将数组a中的所有数限定到范围a_min和a_max中。
# 参数解释:
# a:输入矩阵;
# a_min:被限定的最小值,所有比a_min小的数都会变为a_min;
# a_max:被限定的最大值,所有比a_max大的数都会变为a_max;
# out:可以指定输出矩阵的对象,输出的矩阵shape与a相同
x1 = np.clip(x, 0, img_w - 1)
y1 = np.clip(y, 0, img_h - 1)
x2 = np.clip(x + w, 0, img_w - 1)
y2 = np.clip(y + h, 0, img_h - 1)
# 将bbox转换为MPII的bbox格式
bbox = np.array([x1, y1, x2, y2], dtype=np.float32).reshape(1, 4)
# 将关键点信息加载为(1,k,2)的矩阵,k为关键点个数,关键点可见性信息为(1,k)的向量
# joints_3d 每三个数据为一组,joints_3d格式如下:
# [[[ 0 0 0]
# [ 0 0 0]
# ......
# [ 0 0 0]
# [355 367 2]]]
joints_3d = np.array(ann['keypoints']).reshape(1, -1, 3)
# num_joints为关键点个数,此处为17
num_joints = joints_3d.shape[1]
# 删除joints_3d中最后1列的关键点可见性信息,保存为keypoints
keypoints = np.zeros((1, num_joints, 2), dtype=np.float32)
keypoints[:, :, :2] = joints_3d[:, :, :2]
# keypoints_visible中所有标注了的点均为1,未标注的点为0
keypoints_visible = np.minimum(1, joints_3d[:, :, 2:3])
keypoints_visible = keypoints_visible.reshape(1, -1)
# 整理格式
data_info = {
'id': ann_id,
'img_id': int(ann['image_file'].split('.')[0]),
'img_path': osp.join(self.data_prefix['img'], ann['image_file']),
'bbox': bbox,
'bbox_score': np.ones(1, dtype=np.float32),
'keypoints': keypoints,
'keypoints_visible': keypoints_visible,
}
data_list.append(data_info)
ann_id = ann_id + 1
return data_list, None
在mmpose/datasets/datasets/body/tinycocodataset.py
的同一级文件夹中找到__init__.py
,导入数据集类TinyCocoDataset
并在变量__all__
中添加TinyCocoDataset
,注册前后代码如下
注册前:
# Copyright (c) OpenMMLab. All rights reserved.
from .aic_dataset import AicDataset
from .coco_dataset import CocoDataset
from .crowdpose_dataset import CrowdPoseDataset
from .jhmdb_dataset import JhmdbDataset
from .mhp_dataset import MhpDataset
from .mpii_dataset import MpiiDataset
from .mpii_trb_dataset import MpiiTrbDataset
from .ochuman_dataset import OCHumanDataset
from .posetrack18_dataset import PoseTrack18Dataset
from .posetrack18_video_dataset import PoseTrack18VideoDataset
__all__ = [
'CocoDataset', 'MpiiDataset', 'MpiiTrbDataset', 'AicDataset',
'CrowdPoseDataset', 'OCHumanDataset', 'MhpDataset', 'PoseTrack18Dataset',
'JhmdbDataset', 'PoseTrack18VideoDataset'
]
注册后:
# Copyright (c) OpenMMLab. All rights reserved.
from .aic_dataset import AicDataset
from .coco_dataset import CocoDataset
from .crowdpose_dataset import CrowdPoseDataset
from .jhmdb_dataset import JhmdbDataset
from .mhp_dataset import MhpDataset
from .mpii_dataset import MpiiDataset
from .mpii_trb_dataset import MpiiTrbDataset
from .ochuman_dataset import OCHumanDataset
from .posetrack18_dataset import PoseTrack18Dataset
from .posetrack18_video_dataset import PoseTrack18VideoDataset
from .tinycocodataset import TinyCocoDataset
__all__ = [
'CocoDataset', 'MpiiDataset', 'MpiiTrbDataset', 'AicDataset',
'CrowdPoseDataset', 'OCHumanDataset', 'MhpDataset', 'PoseTrack18Dataset',
'JhmdbDataset', 'PoseTrack18VideoDataset', 'TinyCocoDataset'
]
在configs/body_2d_keypoint/topdown_heatmap/coco
下创建一个配置文件hrnet_w32_coco_tiny_256x192.py
用于配置模型、数据集和运行时设置。此处我们在已有配置文件的基础上进行更改,根据在COCO数据集上训练HRNet的配置文件对其进行修改以适应TinyCoco数据集。
_base_ = ['../../../_base_/default_runtime.py']
# runtime 运行设置
train_cfg = dict(max_epochs=40, val_interval=1)
# optimizer 优化器
optim_wrapper = dict(optimizer=dict(
type='Adam',
lr=5e-4,
))
# learning policy 学习策略
param_scheduler = [
dict(
type='LinearLR', begin=0, end=500, start_factor=0.001,
by_epoch=False), # warm-up
dict(
type='MultiStepLR',
begin=0,
# 原本为train_cfg.max_epochs应该改为train_cfg["max_epochs"]
end=train_cfg["max_epochs"],
milestones=[17, 35],
gamma=0.1,
by_epoch=True)
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
# codec settings
codec = dict(
type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
# model settings 模型设置
model = dict(
type='TopdownPoseEstimator',
data_preprocessor=dict(
type='PoseDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True),
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(32, 64)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(32, 64, 128)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(32, 64, 128, 256))),
init_cfg=dict(
type='Pretrained',
checkpoint='configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_w32-36af842e.pth'),
),
head=dict(
type='HeatmapHead',
in_channels=32,
out_channels=17,
deconv_out_channels=None,
loss=dict(type='KeypointMSELoss', use_target_weight=True),
decoder=codec),
test_cfg=dict(
flip_test=True,
flip_mode='heatmap',
shift_heatmap=True,
))
# base dataset settings 基础数据集设置
# 数据集类名
dataset_type = 'TinyCocoDataset'
# 识别模式 --- "topdown" "bottomup"
data_mode = 'topdown'
# 数据存放位置
data_root = 'data/coco_tiny'
# 训练文件存放位置
work_dir = 'work_dirs/hrnet_w32_tiny_coco_256x192'
# 随机种子
randomness = dict(seed=0)
# pipelines
train_pipeline = [
dict(type='LoadImage'),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomHalfBody'),
dict(type='RandomBBoxTransform'),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='GenerateTarget', target_type='heatmap', encoder=codec),
dict(type='PackPoseInputs')
]
test_pipeline = [
dict(type='LoadImage'),
dict(type='GetBBoxCenterScale'),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='PackPoseInputs')
]
# data loaders
train_dataloader = dict(
batch_size=16,
num_workers=2,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='train.json',
data_prefix=dict(img='images/'),
pipeline=train_pipeline,
))
val_dataloader = dict(
batch_size=16,
num_workers=2,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='val.json',
data_prefix=dict(img='images/'),
test_mode=True,
pipeline=test_pipeline,
))
test_dataloader = val_dataloader
# evaluators
val_evaluator = dict(
type='PCKAccuracy')
test_evaluator = val_evaluator
# hooks
# 原本save_best的值为coco/AP,应改为PCK
default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
在cmd命令行中输入一下命令即可
python tools/train.py configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_w32_coco_tiny_256x192.py
训练好的模型存放在work_dirs/hrnet_w32_tiny_coco_256x192
目录下
在根目录下创建utils/mmpose_model_inference.py
from mmpose.apis import MMPoseInferencer
import time
file_path = r'../data/coco_tiny/images/000000019157.jpg'
# 使用模型配置文件和权重文件的路径或 URL 构建推断器
inferencer = MMPoseInferencer(
pose2d='../work_dirs/hrnet_w32_tiny_coco_256x192/hrnet_w32_coco_tiny_256x192.py',
pose2d_weights='../work_dirs/hrnet_w32_tiny_coco_256x192/best_PCK_epoch_39.pth',
)
start = time.time()
result_generator = inferencer(file_path, show=False, out_dir='output')
results = [r for r in result_generator]
# result = next(result_generator)
end = time.time()
print('Running time: %s Seconds'%(end-start))