最近在做人体姿态估计,有一批上半身关键点的数据集,商汤mmlab中的mmpose很适合,因为没有mmpose训练自己数据集的文章,所以就自己写了一个。
mmpose的GitHub地址:mmpose
不过遇到一个问题 mmpose用的是17个全身关键点 而我只要14个上半身关键点,所以需要自己重新训练一个版本。技术文档都写的很详细,技术文档地址。教程也比较全面,不过需要自己写自定义数据类,由于刚接触这类的东西没经验,所以就仿照着coco数据集的类写了一个。
直接处理为coco数据集的形式
'images': [
{
'file_name': '000000001268.jpg',
'height': 427,
'width': 640,
'id': 1268
},
...
],
'annotations': [
{
'segmentation': [[426.36,
...
424.34,
223.3]],
'keypoints': [0,0,0,
0,0,0,
0,0,0,
427,220,2,
443,222,2,
414,228,2,
449,232,2,
408,248,1,
454,261,2,
0,0,0,
0,0,0,
411,287,2,
431,287,2,
0,0,0,
458,265,2,
0,0,0,
466,300,1],
'num_keypoints': 10,
'area': 3894.5826,
'iscrowd': 0,
'image_id': 1268,
'bbox': [402.34, 205.02, 65.26, 88.45],
'category_id': 1,
'id': 215218
},
...
],
'categories': [
{'id': 1, 'name': 'person'},
]
Json文件中必须包含以下三个关键字:
images: 包含图片信息的列表,提供图片的 file_name, height, width 和 id 等信息。
annotations: 包含实例标注的列表。
categories: 包含类别名称 (’person’) 和对应的 ID (1)。
我的数据集labels是这样的
要给它转为coco的形式,转换代码如下所示:
import os
import re
import cv2
import json
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--root_dir', default='data/', type=str,
help="root path of images and labels, include ./images and ./labels and classes.txt")
parser.add_argument('--save_path', type=str, default='data/train.json',
help="if not split the dataset, give a path to a json file")
parser.add_argument('--random_split', default='8:1:1', action='store_true', help="random split the dataset, default ratio is 8:1:1")
parser.add_argument('--split_by_file', action='store_true',
help="define how to split the dataset, include ./train.txt ./val.txt ./test.txt ")
arg = parser.parse_args()
def train_test_val_split_random(img_paths, ratio_train=0.8, ratio_test=0.1, ratio_val=0.1):
# 这里可以修改数据集划分的比例。
assert int(ratio_train + ratio_test + ratio_val) == 1
train_img, middle_img = train_test_split(img_paths, test_size=1 - ratio_train, random_state=233)
ratio = ratio_val / (1 - ratio_train)
val_img, test_img = train_test_split(middle_img, test_size=ratio, random_state=233)
print("NUMS of train:val:test = {}:{}:{}".format(len(train_img), len(val_img), len(test_img)))
return train_img, val_img, test_img
def train_test_val_split_by_files(img_paths, root_dir):
# 根据文件 train.txt, val.txt, test.txt(里面写的都是对应集合的图片名字) 来定义训练集、验证集和测试集
phases = ['train', 'val', 'test']
img_split = []
for p in phases:
define_path = os.path.join(root_dir, f'{p}.txt')
print(f'Read {p} dataset definition from {define_path}')
assert os.path.exists(define_path)
with open(define_path, 'r') as f:
img_paths = f.readlines()
# img_paths = [os.path.split(img_path.strip())[1] for img_path in img_paths] # NOTE 取消这句备注可以读取绝对地址。
img_split.append(img_paths)
return img_split[0], img_split[1], img_split[2]
def yolo2coco(arg):
root_path = arg.root_dir
print("Loading data from ", root_path)
assert os.path.exists(root_path)
originLabelsDir = os.path.join(root_path, 'labels')
originImagesDir = os.path.join(root_path, 'images')
f = open('data/classes.txt', 'r', encoding='UTF-8')
classes = f.read().strip().split()
print(classes)
# images dir name
indexes = os.listdir(originImagesDir)
if arg.random_split or arg.split_by_file:
# 用于保存所有数据的图片信息和标注信息
train_dataset = {'categories': [], 'annotations': [], 'images': []}
val_dataset = {'categories': [], 'annotations': [], 'images': []}
test_dataset = {'categories': [], 'annotations': [], 'images': []}
# 建立类别标签和数字id的对应关系, 类别id从0开始。
train_dataset['categories'].append({'id': 1, 'name': classes[0]})
val_dataset['categories'].append({'id': 1, 'name': classes[0]})
test_dataset['categories'].append({'id': 1, 'name': classes[0]})
if arg.random_split:
print("spliting mode: random split")
train_img, val_img, test_img = train_test_val_split_random(indexes, 0.8, 0.1, 0.1)
elif arg.split_by_file:
print("spliting mode: split by files")
train_img, val_img, test_img = train_test_val_split_by_files(indexes, root_path)
# 标注的id
print(len(indexes))
ann_id_cnt = 0
for k, index in enumerate(tqdm(indexes)):
# 支持 png jpg 格式的图片。
txtFile = index.replace('images', 'txt').replace('.jpg', '.txt').replace('.png', '.txt')
# 读取图像的宽和高
im = cv2.imread(os.path.join(root_path, 'images/') + index)
height, width, _ = im.shape
if arg.random_split or arg.split_by_file:
# 切换dataset的引用对象,从而划分数据集
if index in train_img:
dataset = train_dataset
elif index in val_img:
dataset = val_dataset
elif index in test_img:
dataset = test_dataset
# 添加图像的信息
dataset['images'].append({'file_name': index,
'id': k,
'width': width,
'height': height})
if not os.path.exists(os.path.join(originLabelsDir, txtFile)):
# 如没标签,跳过,只保留图片信息。
continue
with open(os.path.join(originLabelsDir, txtFile), 'r') as fr:
labelList = fr.readlines()
res = [0] * 42
for label in labelList:
label = label[:-2]
label = re.sub(',', ' ',label)
label = label.strip().split()
label.extend('2')
a = int(label[0])
aa = a * 3
bb = a * 3 + 1
cc = a * 3 + 2
x, y = int(float(label[1]) * width), int(float(label[2]) * height)
res[aa] = x
res[bb] = y
res[cc] = int(label[3])
# convert x,y,w,h to x1,y1,x2,y2
H, W, _ = im.shape
dataset['annotations'].append({
'keypoints': res,
'num_keypoints': int(len(labelList)),
'area': W * H,
'bbox': [0, 0, W, H],
'id': ann_id_cnt,
'image_id': k,
'category_id': 1,
'iscrowd': 0,
})
ann_id_cnt += 1
if index in train_img:
train_dataset = dataset
elif index in val_img:
val_dataset = dataset
elif index in test_img:
test_dataset = dataset
# 保存结果
folder = os.path.join(root_path, 'annotations')
if not os.path.exists(folder):
os.makedirs(folder)
if arg.random_split or arg.split_by_file:
for phase in ['train', 'val', 'test']:
json_name = os.path.join(root_path, 'annotations/{}.json'.format(phase))
with open(json_name, 'w') as f:
if phase == 'train':
json.dump(train_dataset, f)
elif phase == 'val':
json.dump(val_dataset, f)
elif phase == 'test':
json.dump(test_dataset, f)
print('Save annotation to {}'.format(json_name))
else:
json_name = os.path.join(root_path, 'annotations/{}'.format(arg.save_path))
with open(json_name, 'w') as f:
json.dump(dataset, f)
print('Save annotation to {}'.format(json_name))
if __name__ == "__main__":
yolo2coco(arg)
这个根据自己数据集形式改进,里面是随机按8:1:1来分类的,要是自己分好类了就改一下就好了。
首先在configs/base/datasets/路径下创建自己的数据集custom.py,里面包含了14个关键点和每个关键点相连接的信息。
我的信息如下所示,第二行的dataset_name需要更改为你自己的数据集。
dataset_info = dict(
dataset_name='custom',
paper_info=dict(
author='Lin, Tsung-Yi and Maire, Michael and '
'Belongie, Serge and Hays, James and '
'Perona, Pietro and Ramanan, Deva and '
r'Doll{\'a}r, Piotr and Zitnick, C Lawrence',
title='Microsoft coco: Common objects in context',
container='European conference on computer vision',
year='2014',
homepage='http://cocodataset.org/',
),
keypoint_info={
0:
dict(name='head', id=0, color=[51, 153, 255], type='upper', swap=''),
1:
dict(
name='left_eye',
id=1,
color=[51, 153, 255],
type='upper',
swap='right_eye'),
2:
dict(
name='right_eye',
id=2,
color=[51, 153, 255],
type='upper',
swap='left_eye'),
3:
dict(
name='left_ear',
id=3,
color=[51, 153, 255],
type='upper',
swap='right_ear'),
4:
dict(
name='right_ear',
id=4,
color=[51, 153, 255],
type='upper',
swap='left_ear'),
5:
dict(
name='nose',
id=5,
color=[0, 255, 0],
type='upper',
swap=''),
6:
dict(
name='chin',
id=6,
color=[255, 128, 0],
type='upper',
swap=''),
7:
dict(
name='neck',
id=7,
color=[0, 255, 0],
type='upper',
swap=''),
8:
dict(
name='left_shoulder',
id=8,
color=[255, 128, 0],
type='upper',
swap='right_shoulder'),
9:
dict(
name='right_shoulder',
id=9,
color=[0, 255, 0],
type='upper',
swap='left_shoulder'),
10:
dict(
name='left_elbow',
id=10,
color=[255, 128, 0],
type='upper',
swap='right_elbow'),
11:
dict(
name='right_elbow',
id=11,
color=[0, 255, 0],
type='lower',
swap='left_elbow'),
12:
dict(
name='left_wrist',
id=12,
color=[255, 128, 0],
type='lower',
swap='right_wrist'),
13:
dict(
name='right_wrist',
id=13,
color=[0, 255, 0],
type='lower',
swap='left_wrist'),
},
skeleton_info={
0:
dict(link=('chin', 'neck'), id=0, color=[0, 255, 0]),
1:
dict(link=('chin', 'nose'), id=1, color=[0, 255, 0]),
2:
dict(link=('neck', 'left_shoulder'), id=2, color=[255, 128, 0]),
3:
dict(link=('neck', 'right_shoulder'), id=3, color=[255, 128, 0]),
4:
dict(link=('head', 'nose'), id=6, color=[51, 153, 255]),
5:
dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
6:
dict(
link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
7:
dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
8:
dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
9:
dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
10:
dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
11:
dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
12:
dict(link=('right_eye', 'right_ear'), id=15, color=[51, 153, 255]),
},
joint_weights=[
1., 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.2, 1.2, 1.5, 1.5
],
sigmas=[
0.026, 0.025, 0.025, 0.035, 0.035, 0.029, 0.029, 0.072, 0.079, 0.079,
0.072, 0.072, 0.062, 0.062
])
这些颜色啥的都不重要,最下面两个根据自己的用途来定义权重和sigmas得分,技术文档里都写的很详细。
在路径mmpose/datasets/datsets/top_down路径下添加一个topdown_custom_dataset.py文件,文件内容仿照coco的形式来写就好了。
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import tempfile
import warnings
from collections import OrderedDict, defaultdict
import json_tricks as json
import numpy as np
from mmcv import Config, deprecated_api_warning
from xtcocotools.cocoeval import COCOeval
from ....core.post_processing import oks_nms, soft_oks_nms
from ...builder import DATASETS
from ..base import Kpt2dSviewRgbImgTopDownDataset
@DATASETS.register_module()
class TopDownCustomDataset(Kpt2dSviewRgbImgTopDownDataset):
"""CocoDataset dataset for top-down pose estimation.
"Microsoft COCO: Common Objects in Context", ECCV'2014.
More details can be found in the `paper
`__ .
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
COCO keypoint indexes::
0: 'nose',
1: 'left_eye',
2: 'right_eye',
3: 'left_ear',
4: 'right_ear',
5: 'left_shoulder',
6: 'right_shoulder',
7: 'left_elbow',
8: 'right_elbow',
9: 'left_wrist',
10: 'right_wrist',
11: 'left_hip',
12: 'right_hip',
13: 'left_knee',
14: 'right_knee',
15: 'left_ankle',
16: 'right_ankle'
Args:
ann_file (str): Path to the annotation file.
img_prefix (str): Path to a directory where images are held.
Default: None.
data_cfg (dict): config
pipeline (list[dict | callable]): A sequence of data transforms.
dataset_info (DatasetInfo): A class containing all dataset info.
test_mode (bool): Store True when building test or
validation dataset. Default: False.
"""
def __init__(self,
ann_file,
img_prefix,
data_cfg,
pipeline,
dataset_info=None,
test_mode=False):
if dataset_info is None:
warnings.warn(
'dataset_info is missing. '
'Check https://github.com/open-mmlab/mmpose/pull/663 '
'for details.', DeprecationWarning)
cfg = Config.fromfile('configs/_base_/datasets/custom.py')
dataset_info = cfg._cfg_dict['dataset_info']
super().__init__(
ann_file,
img_prefix,
data_cfg,
pipeline,
dataset_info=dataset_info,
test_mode=test_mode)
self.use_gt_bbox = data_cfg['use_gt_bbox']
self.bbox_file = data_cfg['bbox_file']
self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0)
self.use_nms = data_cfg.get('use_nms', True)
self.soft_nms = data_cfg['soft_nms']
self.nms_thr = data_cfg['nms_thr']
self.oks_thr = data_cfg['oks_thr']
self.vis_thr = data_cfg['vis_thr']
self.db = self._get_db()
print(f'=> num_images: {self.num_images}')
print(f'=> load {len(self.db)} samples')
def _get_db(self):
"""Load dataset."""
# if (not self.test_mode) or self.use_gt_bbox:
# use ground truth bbox
gt_db = self._load_coco_keypoint_annotations()
# else:
# use bbox from detection
# gt_db = self._load_coco_person_detection_results()
return gt_db
def _load_coco_keypoint_annotations(self):
"""Ground truth bbox and keypoints."""
gt_db = []
for img_id in self.img_ids:
gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id))
return gt_db
def _load_coco_keypoint_annotation_kernel(self, img_id):
"""load annotation from COCOAPI.
Note:
bbox:[x1, y1, w, h]
Args:
img_id: coco image id
Returns:
dict: db entry
"""
img_ann = self.coco.loadImgs(img_id)[0]
width = img_ann['width']
height = img_ann['height']
num_joints = self.ann_info['num_joints']
ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False)
objs = self.coco.loadAnns(ann_ids)
# sanitize bboxes
valid_objs = []
for obj in objs:
if 'bbox' not in obj:
continue
x, y, w, h = obj['bbox']
x1 = max(0, x)
y1 = max(0, y)
x2 = min(width - 1, x1 + max(0, w - 1))
y2 = min(height - 1, y1 + max(0, h - 1))
if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1:
obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1]
valid_objs.append(obj)
objs = valid_objs
bbox_id = 0
rec = []
for obj in objs:
if 'keypoints' not in obj:
continue
if max(obj['keypoints']) == 0:
continue
if 'num_keypoints' in obj and obj['num_keypoints'] == 0:
continue
joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32)
keypoints = np.array(obj['keypoints']).reshape(-1, 3)
joints_3d[:, :2] = keypoints[:, :2]
joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3])
center, scale = self._xywh2cs(*obj['clean_bbox'][:4])
image_file = osp.join(self.img_prefix, self.id2name[img_id])
rec.append({
'image_file': image_file,
'center': center,
'scale': scale,
'bbox': obj['clean_bbox'][:4],
'rotation': 0,
'joints_3d': joints_3d,
'joints_3d_visible': joints_3d_visible,
'dataset': self.dataset_name,
'bbox_score': 1,
'bbox_id': bbox_id
})
bbox_id = bbox_id + 1
return rec
'''
def _load_coco_person_detection_results(self):
"""Load coco person detection results."""
num_joints = self.ann_info['num_joints']
all_boxes = None
with open(self.bbox_file, 'r') as f:
all_boxes = json.load(f)
if not all_boxes:
raise ValueError('=> Load %s fail!' % self.bbox_file)
print(f'=> Total boxes: {len(all_boxes)}')
kpt_db = []
bbox_id = 0
for det_res in all_boxes:
if det_res['category_id'] != 1:
continue
image_file = osp.join(self.img_prefix,
self.id2name[det_res['image_id']])
box = det_res['bbox']
score = det_res['score']
if score < self.det_bbox_thr:
continue
center, scale = self._xywh2cs(*box[:4])
joints_3d = np.zeros((num_joints, 3), dtype=np.float32)
joints_3d_visible = np.ones((num_joints, 3), dtype=np.float32)
kpt_db.append({
'image_file': image_file,
'center': center,
'scale': scale,
'rotation': 0,
'bbox': box[:4],
'bbox_score': score,
'dataset': self.dataset_name,
'joints_3d': joints_3d,
'joints_3d_visible': joints_3d_visible,
'bbox_id': bbox_id
})
bbox_id = bbox_id + 1
print(f'=> Total boxes after filter '
f'low score@{self.det_bbox_thr}: {bbox_id}')
return kpt_db
'''
@deprecated_api_warning(name_dict=dict(outputs='results'))
def evaluate(self, results, res_folder=None, metric='mAP', **kwargs):
"""Evaluate coco keypoint results. The pose prediction results will be
saved in ``${res_folder}/result_keypoints.json``.
Note:
- batch_size: N
- num_keypoints: K
- heatmap height: H
- heatmap width: W
Args:
results (list[dict]): Testing results containing the following
items:
- preds (np.ndarray[N,K,3]): The first two dimensions are \
coordinates, score is the third dimension of the array.
- boxes (np.ndarray[N,6]): [center[0], center[1], scale[0], \
scale[1],area, score]
- image_paths (list[str]): For example, ['data/coco/val2017\
/000000393226.jpg']
- heatmap (np.ndarray[N, K, H, W]): model output heatmap
- bbox_id (list(int)).
res_folder (str, optional): The folder to save the testing
results. If not specified, a temp folder will be created.
Default: None.
metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
Returns:
dict: Evaluation results for evaluation metric.
"""
metrics = metric if isinstance(metric, list) else [metric]
allowed_metrics = ['mAP']
for metric in metrics:
if metric not in allowed_metrics:
raise KeyError(f'metric {metric} is not supported')
if res_folder is not None:
tmp_folder = None
res_file = osp.join(res_folder, 'result_keypoints.json')
else:
tmp_folder = tempfile.TemporaryDirectory()
res_file = osp.join(tmp_folder.name, 'result_keypoints.json')
kpts = defaultdict(list)
for result in results:
preds = result['preds']
boxes = result['boxes']
image_paths = result['image_paths']
bbox_ids = result['bbox_ids']
batch_size = len(image_paths)
for i in range(batch_size):
image_id = self.name2id[image_paths[i][len(self.img_prefix):]]
kpts[image_id].append({
'keypoints': preds[i],
'center': boxes[i][0:2],
'scale': boxes[i][2:4],
'area': boxes[i][4],
'score': boxes[i][5],
'image_id': image_id,
'bbox_id': bbox_ids[i]
})
kpts = self._sort_and_unique_bboxes(kpts)
# rescoring and oks nms
num_joints = self.ann_info['num_joints']
vis_thr = self.vis_thr
oks_thr = self.oks_thr
valid_kpts = []
for image_id in kpts.keys():
img_kpts = kpts[image_id]
for n_p in img_kpts:
box_score = n_p['score']
kpt_score = 0
valid_num = 0
for n_jt in range(0, num_joints):
t_s = n_p['keypoints'][n_jt][2]
if t_s > vis_thr:
kpt_score = kpt_score + t_s
valid_num = valid_num + 1
if valid_num != 0:
kpt_score = kpt_score / valid_num
# rescoring
n_p['score'] = kpt_score * box_score
if self.use_nms:
nms = soft_oks_nms if self.soft_nms else oks_nms
keep = nms(img_kpts, oks_thr, sigmas=self.sigmas)
valid_kpts.append([img_kpts[_keep] for _keep in keep])
else:
valid_kpts.append(img_kpts)
self._write_coco_keypoint_results(valid_kpts, res_file)
info_str = self._do_python_keypoint_eval(res_file)
name_value = OrderedDict(info_str)
if tmp_folder is not None:
tmp_folder.cleanup()
return name_value
def _write_coco_keypoint_results(self, keypoints, res_file):
"""Write results into a json file."""
data_pack = [{
'cat_id': self._class_to_coco_ind[cls],
'cls_ind': cls_ind,
'cls': cls,
'ann_type': 'keypoints',
'keypoints': keypoints
} for cls_ind, cls in enumerate(self.classes)
if not cls == '__background__']
results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
with open(res_file, 'w') as f:
json.dump(results, f, sort_keys=True, indent=4)
def _coco_keypoint_results_one_category_kernel(self, data_pack):
"""Get coco keypoint results."""
cat_id = data_pack['cat_id']
keypoints = data_pack['keypoints']
cat_results = []
for img_kpts in keypoints:
if len(img_kpts) == 0:
continue
_key_points = np.array(
[img_kpt['keypoints'] for img_kpt in img_kpts])
key_points = _key_points.reshape(-1,
self.ann_info['num_joints'] * 3)
result = [{
'image_id': img_kpt['image_id'],
'category_id': cat_id,
'keypoints': key_point.tolist(),
'score': float(img_kpt['score']),
'center': img_kpt['center'].tolist(),
'scale': img_kpt['scale'].tolist()
} for img_kpt, key_point in zip(img_kpts, key_points)]
cat_results.extend(result)
return cat_results
def _do_python_keypoint_eval(self, res_file):
"""Keypoint evaluation using COCOAPI."""
coco_det = self.coco.loadRes(res_file)
coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas)
coco_eval.params.useSegm = None
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
stats_names = [
'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
'AR .75', 'AR (M)', 'AR (L)'
]
info_str = list(zip(stats_names, coco_eval.stats))
return info_str
def _sort_and_unique_bboxes(self, kpts, key='bbox_id'):
"""sort kpts and remove the repeated ones."""
for img_id, persons in kpts.items():
num = len(persons)
kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key])
for i in range(num - 1, 0, -1):
if kpts[img_id][i][key] == kpts[img_id][i - 1][key]:
del kpts[img_id][i]
return kpts
然后在configs/路径下创建和修改训练配置文件,来使用自己自定义的数据集。
_base_ = [
'_base_/default_runtime.py',
'_base_/datasets/custom.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
channel_cfg = dict(
num_output_channels=14,
dataset_joints=14,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
# [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13
# 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13
])
# model settings111111
model = dict(
type='TopDown',
pretrained='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth',
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(32, 64)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(32, 64, 128)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(32, 64, 128, 256))),
),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=32,
out_channels=channel_cfg['num_output_channels'],
num_deconv_layers=0,
extra=dict(final_conv_kernel=1, ),
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
# image_size=[192, 256],
# heatmap_size=[48, 64],
image_size=[128, 160],
heatmap_size=[32, 40],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=True,
det_bbox_thr=0.0,
bbox_file=None,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'bbox_score', 'rotation',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/custom'
data = dict(
samples_per_gpu=128,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='TopDownCustomDataset',
ann_file=f'{data_root}/annotations/train.json',
img_prefix=f'{data_root}/image/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='TopDownCustomDataset',
ann_file=f'{data_root}/annotations/val.json',
img_prefix=f'{data_root}/image/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='TopDownCustomDataset',
ann_file=f'{data_root}/annotations/val.json',
img_prefix=f'{data_root}/image/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)
上面这些都需要在各级文件夹下的__init__.py里添加自己的自定义类,仿照coco的就行了,用的方法都是top_down的。
添加好之后就可以直接训练了,里面有一些参数有需要可以自己更改,没有就用默认的。
单个GPU训练就直接
python tools/train.py configs/my_custom_config.py
多个GPU的话就,我用的是三张卡
./tools/dist_train.sh configs/my_custom_config.py 3
训练好之后可以测试一下
python demo/top_down_img_demo.py configs/my_custom_config.py work_dir/my_custom_config/epoch_210.pth --img-root tests/data/coco/ --json-file tests/data/coco/test_coco.json --out-img-root vis_results
要是测试自己的图片,那么需要自己手动把人框出来,处理为coco那种形式,可以参考tests/data/coco/test_coco.json
需要将检测模型转换为onnx格式的模型,然后推理。
系统里有pytorch2onnx的转换代码,链接: 转换教程。需要注意一下训练图像的尺寸。
接下来就是onnx的推理代码,官方没有提供,按照训练的过程写一个就好了。调用的包比较多,所以我就直接把用到的都整合在一起了。
# Copyright (c) OpenMMLab. All rights reserved.
from time import time
from torchvision.transforms import functional as F
import numpy as np
import cv2
import warnings
import onnxruntime
#from mmpose.core.evaluation.top_down_eval import keypoints_from_heatmaps
def keypoints_from_heatmaps(heatmaps,
center,
scale,
unbiased=False,
post_process='default',
kernel=11,
valid_radius_factor=0.0546875,
use_udp=False,
target_type='GaussianHeatmap'):
"""Get final keypoint predictions from heatmaps and transform them back to
the image.
Note:
- batch size: N
- num keypoints: K
- heatmap height: H
- heatmap width: W
Args:
heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps.
center (np.ndarray[N, 2]): Center of the bounding box (x, y).
scale (np.ndarray[N, 2]): Scale of the bounding box
wrt height/width.
post_process (str/None): Choice of methods to post-process
heatmaps. Currently supported: None, 'default', 'unbiased',
'megvii'.
unbiased (bool): Option to use unbiased decoding. Mutually
exclusive with megvii.
Note: this arg is deprecated and unbiased=True can be replaced
by post_process='unbiased'
Paper ref: Zhang et al. Distribution-Aware Coordinate
Representation for Human Pose Estimation (CVPR 2020).
kernel (int): Gaussian kernel size (K) for modulation, which should
match the heatmap gaussian sigma when training.
K=17 for sigma=3 and k=11 for sigma=2.
valid_radius_factor (float): The radius factor of the positive area
in classification heatmap for UDP.
use_udp (bool): Use unbiased data processing.
target_type (str): 'GaussianHeatmap' or 'CombinedTarget'.
GaussianHeatmap: Classification target with gaussian distribution.
CombinedTarget: The combination of classification target
(response map) and regression target (offset map).
Paper ref: Huang et al. The Devil is in the Details: Delving into
Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
Returns:
tuple: A tuple containing keypoint predictions and scores.
- preds (np.ndarray[N, K, 2]): Predicted keypoint location in images.
- maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
"""
# Avoid being affected
heatmaps = heatmaps.copy()
# detect conflicts
if unbiased:
assert post_process not in [False, None, 'megvii']
if post_process in ['megvii', 'unbiased']:
assert kernel > 0
if use_udp:
assert not post_process == 'megvii'
# normalize configs
if post_process is False:
warnings.warn(
'post_process=False is deprecated, '
'please use post_process=None instead', DeprecationWarning)
post_process = None
elif post_process is True:
if unbiased is True:
warnings.warn(
'post_process=True, unbiased=True is deprecated,'
" please use post_process='unbiased' instead",
DeprecationWarning)
post_process = 'unbiased'
else:
warnings.warn(
'post_process=True, unbiased=False is deprecated, '
"please use post_process='default' instead",
DeprecationWarning)
post_process = 'default'
elif post_process == 'default':
if unbiased is True:
warnings.warn(
'unbiased=True is deprecated, please use '
"post_process='unbiased' instead", DeprecationWarning)
post_process = 'unbiased'
# start processing
if post_process == 'megvii':
heatmaps = gaussian_blur(heatmaps, kernel=kernel)
N, K, H, W = heatmaps.shape
if use_udp:
if target_type.lower() == 'GaussianHeatMap'.lower():
preds, maxvals = get_max_preds(heatmaps)
preds = post_dark_udp(preds, heatmaps, kernel=kernel)
elif target_type.lower() == 'CombinedTarget'.lower():
for person_heatmaps in heatmaps:
for i, heatmap in enumerate(person_heatmaps):
kt = 2 * kernel + 1 if i % 3 == 0 else kernel
cv2.GaussianBlur(heatmap, (kt, kt), 0, heatmap)
# valid radius is in direct proportion to the height of heatmap.
valid_radius = valid_radius_factor * H
offset_x = heatmaps[:, 1::3, :].flatten() * valid_radius
offset_y = heatmaps[:, 2::3, :].flatten() * valid_radius
heatmaps = heatmaps[:, ::3, :]
preds, maxvals = get_max_preds(heatmaps)
index = preds[..., 0] + preds[..., 1] * W
index += W * H * np.arange(0, N * K / 3)
index = index.astype(int).reshape(N, K // 3, 1)
preds += np.concatenate((offset_x[index], offset_y[index]), axis=2)
else:
raise ValueError('target_type should be either '
"'GaussianHeatmap' or 'CombinedTarget'")
else:
preds, maxvals = get_max_preds(heatmaps)
if post_process == 'unbiased': # alleviate biased coordinate
# apply Gaussian distribution modulation.
heatmaps = np.log(
np.maximum(gaussian_blur(heatmaps, kernel), 1e-10))
for n in range(N):
for k in range(K):
preds[n][k] = taylor(heatmaps[n][k], preds[n][k])
elif post_process is not None:
# add +/-0.25 shift to the predicted locations for higher acc.
for n in range(N):
for k in range(K):
heatmap = heatmaps[n][k]
px = int(preds[n][k][0])
py = int(preds[n][k][1])
if 1 < px < W - 1 and 1 < py < H - 1:
diff = np.array([
heatmap[py][px + 1] - heatmap[py][px - 1],
heatmap[py + 1][px] - heatmap[py - 1][px]
])
preds[n][k] += np.sign(diff) * .25
if post_process == 'megvii':
preds[n][k] += 0.5
# Transform back to the image
for i in range(N):
preds[i] = transform_preds(
preds[i], center[i], scale[i], [W, H], use_udp=use_udp)
if post_process == 'megvii':
maxvals = maxvals / 255.0 + 0.5
return preds, maxvals
def taylor(heatmap, coord):
"""Distribution aware coordinate decoding method.
Note:
- heatmap height: H
- heatmap width: W
Args:
heatmap (np.ndarray[H, W]): Heatmap of a particular joint type.
coord (np.ndarray[2,]): Coordinates of the predicted keypoints.
Returns:
np.ndarray[2,]: Updated coordinates.
"""
H, W = heatmap.shape[:2]
px, py = int(coord[0]), int(coord[1])
if 1 < px < W - 2 and 1 < py < H - 2:
dx = 0.5 * (heatmap[py][px + 1] - heatmap[py][px - 1])
dy = 0.5 * (heatmap[py + 1][px] - heatmap[py - 1][px])
dxx = 0.25 * (
heatmap[py][px + 2] - 2 * heatmap[py][px] + heatmap[py][px - 2])
dxy = 0.25 * (
heatmap[py + 1][px + 1] - heatmap[py - 1][px + 1] -
heatmap[py + 1][px - 1] + heatmap[py - 1][px - 1])
dyy = 0.25 * (
heatmap[py + 2 * 1][px] - 2 * heatmap[py][px] +
heatmap[py - 2 * 1][px])
derivative = np.array([[dx], [dy]])
hessian = np.array([[dxx, dxy], [dxy, dyy]])
if dxx * dyy - dxy**2 != 0:
hessianinv = np.linalg.inv(hessian)
offset = -hessianinv @ derivative
offset = np.squeeze(np.array(offset.T), axis=0)
coord += offset
return coord
def transform_preds(coords, center, scale, output_size, use_udp=False):
"""Get final keypoint predictions from heatmaps and apply scaling and
translation to map them back to the image.
Note:
num_keypoints: K
Args:
coords (np.ndarray[K, ndims]):
* If ndims=2, corrds are predicted keypoint location.
* If ndims=4, corrds are composed of (x, y, scores, tags)
* If ndims=5, corrds are composed of (x, y, scores, tags,
flipped_tags)
center (np.ndarray[2, ]): Center of the bounding box (x, y).
scale (np.ndarray[2, ]): Scale of the bounding box
wrt [width, height].
output_size (np.ndarray[2, ] | list(2,)): Size of the
destination heatmaps.
use_udp (bool): Use unbiased data processing
Returns:
np.ndarray: Predicted coordinates in the images.
"""
assert coords.shape[1] in (2, 4, 5)
assert len(center) == 2
assert len(scale) == 2
assert len(output_size) == 2
# Recover the scale which is normalized by a factor of 200.
scale = scale * 200.0
if use_udp:
scale_x = scale[0] / (output_size[0] - 1.0)
scale_y = scale[1] / (output_size[1] - 1.0)
else:
scale_x = scale[0] / output_size[0]
scale_y = scale[1] / output_size[1]
target_coords = np.ones_like(coords)
target_coords[:, 0] = coords[:, 0] * scale_x + center[0] - scale[0] * 0.5
target_coords[:, 1] = coords[:, 1] * scale_y + center[1] - scale[1] * 0.5
return target_coords
def post_dark_udp(coords, batch_heatmaps, kernel=3):
"""DARK post-pocessing. Implemented by udp. Paper ref: Huang et al. The
Devil is in the Details: Delving into Unbiased Data Processing for Human
Pose Estimation (CVPR 2020). Zhang et al. Distribution-Aware Coordinate
Representation for Human Pose Estimation (CVPR 2020).
Note:
- batch size: B
- num keypoints: K
- num persons: N
- height of heatmaps: H
- width of heatmaps: W
B=1 for bottom_up paradigm where all persons share the same heatmap.
B=N for top_down paradigm where each person has its own heatmaps.
Args:
coords (np.ndarray[N, K, 2]): Initial coordinates of human pose.
batch_heatmaps (np.ndarray[B, K, H, W]): batch_heatmaps
kernel (int): Gaussian kernel size (K) for modulation.
Returns:
np.ndarray([N, K, 2]): Refined coordinates.
"""
if not isinstance(batch_heatmaps, np.ndarray):
batch_heatmaps = batch_heatmaps.cpu().numpy()
B, K, H, W = batch_heatmaps.shape
N = coords.shape[0]
assert (B == 1 or B == N)
for heatmaps in batch_heatmaps:
for heatmap in heatmaps:
cv2.GaussianBlur(heatmap, (kernel, kernel), 0, heatmap)
np.clip(batch_heatmaps, 0.001, 50, batch_heatmaps)
np.log(batch_heatmaps, batch_heatmaps)
batch_heatmaps_pad = np.pad(
batch_heatmaps, ((0, 0), (0, 0), (1, 1), (1, 1)),
mode='edge').flatten()
index = coords[..., 0] + 1 + (coords[..., 1] + 1) * (W + 2)
index += (W + 2) * (H + 2) * np.arange(0, B * K).reshape(-1, K)
index = index.astype(int).reshape(-1, 1)
i_ = batch_heatmaps_pad[index]
ix1 = batch_heatmaps_pad[index + 1]
iy1 = batch_heatmaps_pad[index + W + 2]
ix1y1 = batch_heatmaps_pad[index + W + 3]
ix1_y1_ = batch_heatmaps_pad[index - W - 3]
ix1_ = batch_heatmaps_pad[index - 1]
iy1_ = batch_heatmaps_pad[index - 2 - W]
dx = 0.5 * (ix1 - ix1_)
dy = 0.5 * (iy1 - iy1_)
derivative = np.concatenate([dx, dy], axis=1)
derivative = derivative.reshape(N, K, 2, 1)
dxx = ix1 - 2 * i_ + ix1_
dyy = iy1 - 2 * i_ + iy1_
dxy = 0.5 * (ix1y1 - ix1 - iy1 + i_ + i_ - ix1_ - iy1_ + ix1_y1_)
hessian = np.concatenate([dxx, dxy, dxy, dyy], axis=1)
hessian = hessian.reshape(N, K, 2, 2)
hessian = np.linalg.inv(hessian + np.finfo(np.float32).eps * np.eye(2))
coords -= np.einsum('ijmn,ijnk->ijmk', hessian, derivative).squeeze()
return coords
def get_max_preds(heatmaps):
"""Get keypoint predictions from score maps.
Note:
batch_size: N
num_keypoints: K
heatmap height: H
heatmap width: W
Args:
heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps.
Returns:
tuple: A tuple containing aggregated results.
- preds (np.ndarray[N, K, 2]): Predicted keypoint location.
- maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
"""
assert isinstance(heatmaps,
np.ndarray), ('heatmaps should be numpy.ndarray')
assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'
N, K, _, W = heatmaps.shape
heatmaps_reshaped = heatmaps.reshape((N, K, -1))
idx = np.argmax(heatmaps_reshaped, 2).reshape((N, K, 1))
maxvals = np.amax(heatmaps_reshaped, 2).reshape((N, K, 1))
preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
preds[:, :, 0] = preds[:, :, 0] % W
preds[:, :, 1] = preds[:, :, 1] // W
preds = np.where(np.tile(maxvals, (1, 1, 2)) > 0.0, preds, -1)
return preds, maxvals
def get_max_preds_3d(heatmaps):
"""Get keypoint predictions from 3D score maps.
Note:
batch size: N
num keypoints: K
heatmap depth size: D
heatmap height: H
heatmap width: W
Args:
heatmaps (np.ndarray[N, K, D, H, W]): model predicted heatmaps.
Returns:
tuple: A tuple containing aggregated results.
- preds (np.ndarray[N, K, 3]): Predicted keypoint location.
- maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
"""
assert isinstance(heatmaps, np.ndarray), \
('heatmaps should be numpy.ndarray')
assert heatmaps.ndim == 5, 'heatmaps should be 5-ndim'
N, K, D, H, W = heatmaps.shape
heatmaps_reshaped = heatmaps.reshape((N, K, -1))
idx = np.argmax(heatmaps_reshaped, 2).reshape((N, K, 1))
maxvals = np.amax(heatmaps_reshaped, 2).reshape((N, K, 1))
preds = np.zeros((N, K, 3), dtype=np.float32)
_idx = idx[..., 0]
preds[..., 2] = _idx // (H * W)
preds[..., 1] = (_idx // W) % H
preds[..., 0] = _idx % W
preds = np.where(maxvals > 0.0, preds, -1)
return preds, maxvals
def get_3rd_point(a, b):
"""To calculate the affine matrix, three pairs of points are required. This
function is used to get the 3rd point, given 2D points a & b.
The 3rd point is defined by rotating vector `a - b` by 90 degrees
anticlockwise, using b as the rotation center.
Args:
a (np.ndarray): point(x,y)
b (np.ndarray): point(x,y)
Returns:
np.ndarray: The 3rd point.
"""
assert len(a) == 2
assert len(b) == 2
direction = a - b
third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
return third_pt
def gaussian_blur(heatmaps, kernel=11):
"""Modulate heatmap distribution with Gaussian.
sigma = 0.3*((kernel_size-1)*0.5-1)+0.8
sigma~=3 if k=17
sigma=2 if k=11;
sigma~=1.5 if k=7;
sigma~=1 if k=3;
Note:
- batch_size: N
- num_keypoints: K
- heatmap height: H
- heatmap width: W
Args:
heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps.
kernel (int): Gaussian kernel size (K) for modulation, which should
match the heatmap gaussian sigma when training.
K=17 for sigma=3 and k=11 for sigma=2.
Returns:
np.ndarray ([N, K, H, W]): Modulated heatmap distribution.
"""
assert kernel % 2 == 1
border = (kernel - 1) // 2
batch_size = heatmaps.shape[0]
num_joints = heatmaps.shape[1]
height = heatmaps.shape[2]
width = heatmaps.shape[3]
for i in range(batch_size):
for j in range(num_joints):
origin_max = np.max(heatmaps[i, j])
dr = np.zeros((height + 2 * border, width + 2 * border),
dtype=np.float32)
dr[border:-border, border:-border] = heatmaps[i, j].copy()
dr = cv2.GaussianBlur(dr, (kernel, kernel), 0)
heatmaps[i, j] = dr[border:-border, border:-border].copy()
heatmaps[i, j] *= origin_max / np.max(heatmaps[i, j])
return heatmaps
def rotate_point(pt, angle_rad):
"""Rotate a point by an angle.
Args:
pt (list[float]): 2 dimensional point to be rotated
angle_rad (float): rotation angle by radian
Returns:
list[float]: Rotated point.
"""
assert len(pt) == 2
sn, cs = np.sin(angle_rad), np.cos(angle_rad)
new_x = pt[0] * cs - pt[1] * sn
new_y = pt[0] * sn + pt[1] * cs
rotated_pt = [new_x, new_y]
return rotated_pt
def get_affine_transform(center,
scale,
rot,
output_size,
shift=(0., 0.),
inv=False):
"""Get the affine transform matrix, given the center/scale/rot/output_size.
Args:
center (np.ndarray[2, ]): Center of the bounding box (x, y).
scale (np.ndarray[2, ]): Scale of the bounding box
wrt [width, height].
rot (float): Rotation angle (degree).
output_size (np.ndarray[2, ] | list(2,)): Size of the
destination heatmaps.
shift (0-100%): Shift translation ratio wrt the width/height.
Default (0., 0.).
inv (bool): Option to inverse the affine transform direction.
(inv=False: src->dst or inv=True: dst->src)
Returns:
np.ndarray: The transform matrix.
"""
assert len(center) == 2
assert len(scale) == 2
assert len(output_size) == 2
assert len(shift) == 2
# pixel_std is 200.
scale_tmp = scale * 200.0
shift = np.array(shift)
src_w = scale_tmp[0]
dst_w = output_size[0]
dst_h = output_size[1]
rot_rad = np.pi * rot / 180
src_dir = rotate_point([0., src_w * -0.5], rot_rad)
dst_dir = np.array([0., dst_w * -0.5])
src = np.zeros((3, 2), dtype=np.float32)
src[0, :] = center + scale_tmp * shift
src[1, :] = center + src_dir + scale_tmp * shift
src[2, :] = get_3rd_point(src[0, :], src[1, :])
dst = np.zeros((3, 2), dtype=np.float32)
dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
dst[2, :] = get_3rd_point(dst[0, :], dst[1, :])
if inv:
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
else:
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
return trans
def inference_with_onnx(config, img):
onnx_model = config['modelPaths']
session = onnxruntime.InferenceSession(onnx_model)
output_tensor = [node.name for node in session.get_outputs()]
input_tensor = session.get_inputs()
output_result = session.run(output_tensor, input_feed={input_tensor[0].name: img})
return output_result
def vis_pose(img, points):
for i, point in enumerate(points):
x, y = point
x = int(x)
y = int(y)
cv2.circle(img, (x, y), 4, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
cv2.putText(img, '{}'.format(i), (x, y), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5,
color=(255, 255, 255),
thickness=1, lineType=cv2.LINE_AA)
return img
if __name__ == '__main__':
config = {
'modelPaths': 'tmp.onnx',
'image_file': 'data/custom/image/1.jpg'
}
#read image
img = cv2.imread(config['image_file'])
#data process
width, height, _ = img.shape
x, y, w, h = 0, 0, width, height
center = np.array([[x + w * 0.5, y + h * 0.5]], dtype=np.float32)
input_size = [128, 160]
aspect_ratio = input_size[0] / input_size[1]
if w > aspect_ratio * h:
h = w * 1.0 / aspect_ratio
elif w < aspect_ratio * h:
w = h * aspect_ratio
scale = np.array([[w / 200, h / 200]], dtype=np.float32)
scale = scale * 1.25
#Get the affine transform matrix, given the center/scale/rot/output_size.
trans = get_affine_transform(center[0], scale[0], 0, input_size)
#Affine transformation
image = cv2.warpAffine(
img,
trans, (int(input_size[0]), int(input_size[1])),
flags=cv2.INTER_LINEAR)
#NormalizeTensor
image = F.to_tensor(image)
image = F.normalize(image, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
image = image.unsqueeze(0)
image = image.numpy()
#get heatmap
heatmap = inference_with_onnx(config, image)[0]
#get ketpoints from heatmaps
res = keypoints_from_heatmaps(heatmap, center, scale)[0]
res = res.tolist()
#vis pose
img = vis_pose(img, res[0])
#save
cv2.imwrite('result.png', img)
根据自己的情况更改,需要注意一下图片宽高的问题。
写的可能有点乱…