HRNet模型源代码:https://github.com/HRNet/HRNet-Human-Pose-Estimation
安装pytorch(CUDA版本:10.0):
#conda install pytorch==1.0.0 torchvision==0.2.1 cuda100 -c pytorch
conda install pytorch==1.2.0 torchvision==0.4.0 cudatoolkit=10.0
安装HRNet环境:
EasyDict==1.7
opencv-python
shapely
Cython
scipy
pandas
pyyamlp
json_tricks
scikit-image
yacs>=0.1.5
tensorboardX>=1.6
#pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn
改动地方:
1.main() --line93 --将模型加载到gpu
原:
model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
cfg, is_train=True
)
改:
model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
cfg, is_train=True
).cuda()
2.main() --line110 --将输入张量加载到gpu
原:
dump_input = torch.rand(
(1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])
)
改:
dump_input = torch.rand(
(1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])
).cuda()
3.main() --line111 --报错
注释掉
4.main() --line115 --单gpu
注释掉
5. lib/core/function.py --line39 --将输入数据加载到gpu
增加:
input = input.cuda()
6.lib/core/function.py --line43 --将输出数据加载到gpu
原:
outputs = model(input)
改:
outputs = model(input).cuda()
7.lib/dateset/coco.py --line113 --训练标签路径
8.lib/dateset/coco.py --line232 --训练图片路径
9.lib/dateset/coco.py --line136 --原训练集太大,进行训练集缩小
原:
def _load_coco_keypoint_annotations(self):
""" ground truth bbox and keypoints """
gt_db = []
for index in self.image_set_index:
gt_db.extend(self._load_coco_keypoint_annotation_kernal(index))
return gt_db
改:
def _load_coco_keypoint_annotations(self):
""" ground truth bbox and keypoints """
gt_db = []
i = 0
for index in self.image_set_index:
if i%25 == 0:
gt_db.extend(self._load_coco_keypoint_annotation_kernal(index))
i +=1
return gt_db
10.lib/nms/nms.py --line13,14无法导入模块
注释13,14行,增加cpu_nms函数:
def cpu_nms(dets, thresh):
x1 = dets[:,0]
y1 = dets[:,1]
x2 = dets[:,2]
y2 = dets[:,3]
areas = (y2-y1+1) * (x2-x1+1)
scores = dets[:,4]
keep = []
index = scores.argsort()[::-1]
while index.size >0:
i = index[0] # every time the first is the biggst, and add it directly
keep.append(i)
x11 = np.maximum(x1[i], x1[index[1:]]) # calculate the points of overlap
y11 = np.maximum(y1[i], y1[index[1:]])
x22 = np.minimum(x2[i], x2[index[1:]])
y22 = np.minimum(y2[i], y2[index[1:]])
w = np.maximum(0, x22-x11+1) # the weights of overlap
h = np.maximum(0, y22-y11+1) # the height of overlap
overlaps = w*h
ious = overlaps / (areas[i]+areas[index[1:]] - overlaps)
idx = np.where(ious<=thresh)[0]
index = index[idx+1] # because index start from 1
return keep
11.修改w32_384x288_adam_lr1e-3.yaml文件
AUTO_RESUME: true
CUDNN:
BENCHMARK: false
DETERMINISTIC: true
ENABLED: true
DATA_DIR: ''
# GPUS: (0,1,2,3)
GPUS: (0,)
OUTPUT_DIR: 'output'
LOG_DIR: 'log'
WORKERS: 0
PRINT_FREQ: 1
DATASET:
COLOR_RGB: true
DATASET: 'coco'
DATA_FORMAT: jpg
FLIP: true
NUM_JOINTS_HALF_BODY: 8
PROB_HALF_BODY: 0.3
ROOT: 'E:/MS_COCO/'
# ROOT: 'data/coco/'
ROT_FACTOR: 45
SCALE_FACTOR: 0.35
TEST_SET: 'val2017'
TRAIN_SET: 'train2017'
MODEL:
INIT_WEIGHTS: False
NAME: pose_hrnet
NUM_JOINTS: 17
PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
TARGET_TYPE: gaussian
IMAGE_SIZE:
- 288
- 384
HEATMAP_SIZE:
- 72
- 96
SIGMA: 3
EXTRA:
PRETRAINED_LAYERS:
- 'conv1'
- 'bn1'
- 'conv2'
- 'bn2'
- 'layer1'
- 'transition1'
- 'stage2'
- 'transition2'
- 'stage3'
- 'transition3'
- 'stage4'
FINAL_CONV_KERNEL: 1
STAGE2:
NUM_MODULES: 1
NUM_BRANCHES: 2
BLOCK: BASIC
NUM_BLOCKS:
- 4
- 4
NUM_CHANNELS:
- 32
- 64
FUSE_METHOD: SUM
STAGE3:
NUM_MODULES: 4
NUM_BRANCHES: 3
BLOCK: BASIC
NUM_BLOCKS:
- 4
- 4
- 4
NUM_CHANNELS:
- 32
- 64
- 128
FUSE_METHOD: SUM
STAGE4:
NUM_MODULES: 3
NUM_BRANCHES: 4
BLOCK: BASIC
NUM_BLOCKS:
- 4
- 4
- 4
- 4
NUM_CHANNELS:
- 32
- 64
- 128
- 256
FUSE_METHOD: SUM
LOSS:
USE_TARGET_WEIGHT: true
TRAIN:
BATCH_SIZE_PER_GPU: 4
SHUFFLE: true
BEGIN_EPOCH: 0
END_EPOCH: 210
OPTIMIZER: adam
LR: 0.001
LR_FACTOR: 0.1
LR_STEP:
- 170
- 200
WD: 0.0001
GAMMA1: 0.99
GAMMA2: 0.0
MOMENTUM: 0.9
NESTEROV: false
TEST:
BATCH_SIZE_PER_GPU: 16
COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
BBOX_THRE: 1.0
IMAGE_THRE: 0.0
IN_VIS_THRE: 0.2
MODEL_FILE: ''
NMS_THRE: 1.0
OKS_THRE: 0.9
USE_GT_BBOX: true
FLIP_TEST: true
POST_PROCESS: true
SHIFT_HEATMAP: true
DEBUG:
DEBUG: true
SAVE_BATCH_IMAGES_GT: true
SAVE_BATCH_IMAGES_PRED: true
SAVE_HEATMAPS_GT: true
SAVE_HEATMAPS_PRED: true
12.生成单个人的pre_image和pre_heatmap
须设置batchsize=1
1>修改E:/HRNet-Human-Pose-Estimation-master/lib/core/function.py文件中的validate
def validate(config, val_loader, val_dataset, model, criterion, output_dir,
tb_log_dir, all_accuracy, writer_dict=None):
batch_time = AverageMeter()
losses = AverageMeter()
acc = AverageMeter()
# switch to evaluate mode
model.eval()
num_samples = len(val_dataset)
all_preds = np.zeros(
(num_samples, config.MODEL.NUM_JOINTS, 3),
dtype=np.float32
)
all_boxes = np.zeros((num_samples, 6))
image_path = []
filenames = []
imgnums = []
idx = 0
with torch.no_grad():
end = time.time()
for i, (input, target, target_weight, meta) in enumerate(val_loader):
# compute output
# print('i',i)
# print('input_shape',input.shape)
outputs = model(input)
if isinstance(outputs, list):
output = outputs[-1]
else:
output = outputs
if config.TEST.FLIP_TEST:
# this part is ugly, because pytorch has not supported negative index
# input_flipped = model(input[:, :, :, ::-1])
input_flipped = np.flip(input.cpu().numpy(), 3).copy()
input_flipped = torch.from_numpy(input_flipped).cuda()
outputs_flipped = model(input_flipped)
if isinstance(outputs_flipped, list):
output_flipped = outputs_flipped[-1]
else:
output_flipped = outputs_flipped
output_flipped = flip_back(output_flipped.cpu().numpy(),
val_dataset.flip_pairs)
output_flipped = torch.from_numpy(output_flipped.copy()).cuda()
# feature is not aligned, shift flipped heatmap for higher accuracy
if config.TEST.SHIFT_HEATMAP:
output_flipped[:, :, :, 1:] = \
output_flipped.clone()[:, :, :, 0:-1]
output = (output + output_flipped) * 0.5
target = target.cuda(non_blocking=True)
target_weight = target_weight.cuda(non_blocking=True)
loss = criterion(output, target, target_weight)
num_images = input.size(0)
# measure accuracy and record loss
losses.update(loss.item(), num_images)
_, avg_acc, cnt, pred = accuracy(output.cpu().numpy(),
target.cpu().numpy())
acc.update(avg_acc, cnt)
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
c = meta['center'].numpy()
s = meta['scale'].numpy()
# print('c,s',c,s)
score = meta['score'].numpy()
preds, maxvals = get_final_preds(
config, output.clone().cpu().numpy(), c, s)
all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2]
all_preds[idx:idx + num_images, :, 2:3] = maxvals
# double check this all_boxes parts
all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2]
all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2]
all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1)
all_boxes[idx:idx + num_images, 5] = score
# image_path.extend(meta['image'])
image_name = str(meta['image'])[-18:-6]
# name=str(meta['image']).split("//")[1].split(".")[0]
# print(image[-18:-6])
idx += num_images
# if i % config.PRINT_FREQ != 0:
# if i % config.PRINT_FREQ != 0:
# print(i)
# msg = 'Test: [{0}/{1}]\t' \
# 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \
# 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \
# 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
# i, len(val_loader), batch_time=batch_time,
# loss=losses, acc=acc)
# logger.info(msg)
# all_accuracy.append((loss.item(),avg_acc))
prefix = '{}_{}_{}'.format(
os.path.join(output_dir, 'val'), i,image_name
)
save_debug_images(config, input, meta, target, pred*4, output,prefix)
# name_values, perf_indicator = val_dataset.evaluate(
# config, all_preds, output_dir, all_boxes, image_path,
# filenames, imgnums
# )
# model_name = config.MODEL.NAME
# if isinstance(name_values, list):
# for name_value in name_values:
# _print_name_value(name_value, model_name)
# else:
# _print_name_value(name_values, model_name)
# if writer_dict:
# writer = writer_dict['writer']
# global_steps = writer_dict['valid_global_steps']
# writer.add_scalar(
# 'valid_loss',
# losses.avg,
# global_steps
# )
# writer.add_scalar(
# 'valid_acc',
# acc.avg,
# global_steps
# )
# if isinstance(name_values, list):
# for name_value in name_values:
# writer.add_scalars(
# 'valid',
# dict(name_value),
# global_steps
# )
# else:
# writer.add_scalars(
# 'valid',
# dict(name_values),
# global_steps
# )
# writer_dict['valid_global_steps'] = global_steps + 1
# return perf_indicator
2>修改E:/HRNet-Human-Pose-Estimation-master/lib/utils/vis.py文件
# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Written by Bin Xiao ([email protected])
# ------------------------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import numpy as np
import torchvision
import cv2
from core.inference import get_max_preds
def save_batch_image_with_joints(batch_image, batch_joints, batch_joints_vis,
file_name, nrow=8, padding=2):
'''
batch_image: [batch_size, channel, height, width]
batch_joints: [batch_size, num_joints, 3],
batch_joints_vis: [batch_size, num_joints, 1],
}
'''
grid = torchvision.utils.make_grid(batch_image, nrow, padding, True)
ndarr = grid.mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy()
ndarr = ndarr.copy()
# print('ndarr.shape',ndarr.shape)
nmaps = batch_image.size(0)
xmaps = min(nrow, nmaps)
ymaps = int(math.ceil(float(nmaps) / xmaps))
height = int(batch_image.size(2) + padding)
width = int(batch_image.size(3) + padding)
k = 0
for y in range(ymaps):
for x in range(xmaps):
if k >= nmaps:
break
joints = batch_joints[k]
joints_vis = batch_joints_vis[k]
for joint, joint_vis in zip(joints, joints_vis):
joint[0] = x * width + padding + joint[0]
joint[1] = y * height + padding + joint[1]
if joint_vis[0]:
cv2.circle(ndarr, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2)
k = k + 1
cv2.imwrite(file_name, ndarr)
def save_batch_heatmaps(batch_image, batch_heatmaps, file_name,
normalize=True):
'''
batch_image: [batch_size, channel, height, width]
batch_heatmaps: ['batch_size, num_joints, height, width]
file_name: saved file name
'''
if normalize:
batch_image = batch_image.clone()
min = float(batch_image.min())
max = float(batch_image.max())
batch_image.add_(-min).div_(max - min + 1e-5)
batch_size = batch_heatmaps.size(0)
num_joints = batch_heatmaps.size(1)
heatmap_height = batch_heatmaps.size(2)
heatmap_width = batch_heatmaps.size(3)
# print(heatmap_height)
grid_image = np.zeros((batch_size*heatmap_height,
(num_joints+1)*heatmap_width,
3),
dtype=np.uint8)
preds, maxvals = get_max_preds(batch_heatmaps.detach().cpu().numpy())
# # 输出:96x72
# for i in range(batch_size):
# image = batch_image[i].mul(255)\
# .clamp(0, 255)\
# .byte()\
# .permute(1, 2, 0)\
# .cpu().numpy()
# heatmaps = batch_heatmaps[i].mul(255)\
# .clamp(0, 255)\
# .byte()\
# .cpu().numpy()
# resized_image = cv2.resize(image,
# (int(heatmap_width), int(heatmap_height)))
# height_begin = heatmap_height * i
# height_end = heatmap_height * (i + 1)
# for j in range(num_joints):
# per_grid_image = np.zeros((batch_size*heatmap_height,
# 1*heatmap_width,
# 3),
# dtype=np.uint8)
# per_resized_image = cv2.resize(image,
# (int(heatmap_width), int(heatmap_height)))
# cv2.circle(per_resized_image,
# (int(preds[i][j][0]), int(preds[i][j][1])),
# 1, [0, 0, 255], 1)
# heatmap = heatmaps[j, :, :]
# colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
# per_masked_image = colored_heatmap*0.7 + per_resized_image*0.3
# cv2.circle(per_masked_image,
# (int(preds[i][j][0]), int(preds[i][j][1])),
# 1, [0, 0, 255], 1)
# per_width_begin = heatmap_width * 0
# per_width_end = heatmap_width * 1
# per_grid_image[height_begin:height_end, per_width_begin:per_width_end, :] = \
# per_masked_image
# s = str
# name = 'E:/HRNet-Human-Pose-Estimation-master/output/coco/new_pose_hrnet/test/' + 'pre_' + str(j) + '.jpg'
# # name = 'E:/HRNet-Human-Pose-Estimation-master/output/coco/new_pose_hrnet/test/' + 'gt_' + str(j+16) + '.jpg'
# cv2.imwrite(name, per_masked_image)
# 输出:48x64
for i in range(batch_size):
image = batch_image[i].mul(255)\
.clamp(0, 255)\
.byte()\
.permute(1, 2, 0)\
.cpu().numpy()
heatmaps = batch_heatmaps[i].mul(255)\
.clamp(0, 255)\
.byte()\
.cpu().numpy()
resized_image = cv2.resize(image,
(int(heatmap_width), int(heatmap_height)))
height_begin = heatmap_height * i
height_end = heatmap_height * (i + 1)
for j in range(num_joints):
per_grid_image = np.zeros((batch_size*heatmap_height,
1*heatmap_width,
3),
dtype=np.uint8)
per_resized_image = cv2.resize(image,(int(heatmap_width*4), int(heatmap_height*4)))
cv2.circle(per_resized_image,
(int(preds[i][j][0]*4), int(preds[i][j][1])*4),
1, [0, 0, 255], 1)
heatmap = heatmaps[j, :, :]
heatmap = cv2.resize(heatmap,(int(heatmap_width*4), int(heatmap_height*4)))
colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
per_masked_image = colored_heatmap*0.7 + per_resized_image*0.3
cv2.circle(per_masked_image,
(int(preds[i][j][0]), int(preds[i][j][1])),
1, [0, 0, 255], 1)
# per_width_begin = heatmap_width * 0
# per_width_end = heatmap_width * 1
# per_grid_image[height_begin:height_end, per_width_begin:per_width_end, :] = \
# per_masked_image
s = str
name = 'E:/HRNet-Human-Pose-Estimation-master/output/coco/new_pose_hrnet/test/' + 'pre_' + str(j) + '.jpg'
# name = 'E:/HRNet-Human-Pose-Estimation-master/output/coco/new_pose_hrnet/test/' + 'gt_' + str(j+16) + '.jpg'
cv2.imwrite(name, per_masked_image)
# 原始
# for i in range(batch_size):
# image = batch_image[i].mul(255)\
# .clamp(0, 255)\
# .byte()\
# .permute(1, 2, 0)\
# .cpu().numpy()
# heatmaps = batch_heatmaps[i].mul(255)\
# .clamp(0, 255)\
# .byte()\
# .cpu().numpy()
# resized_image = cv2.resize(image,
# (int(heatmap_width), int(heatmap_height)))
# height_begin = heatmap_height * i
# height_end = heatmap_height * (i + 1)
# for j in range(num_joints):
# cv2.circle(resized_image,
# (int(preds[i][j][0]), int(preds[i][j][1])),
# 1, [0, 0, 255], 1)
# heatmap = heatmaps[j, :, :]
# colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
# masked_image = colored_heatmap*0.7 + resized_image*0.3
# cv2.circle(masked_image,
# (int(preds[i][j][0]), int(preds[i][j][1])),
# 1, [0, 0, 255], 1)
# width_begin = heatmap_width * (j+1)
# width_end = heatmap_width * (j+2)
# grid_image[height_begin:height_end, width_begin:width_end, :] = \
# masked_image
# grid_image[height_begin:height_end, width_begin:width_end, :] = \
# colored_heatmap*0.7 + resized_image*0.3
# grid_image[height_begin:height_end, 0:heatmap_width, :] = resized_image
# cv2.imwrite(file_name, grid_image)
def save_debug_images(config, input, meta, target, joints_pred, output,
prefix):
if not config.DEBUG.DEBUG:
return
if config.DEBUG.SAVE_BATCH_IMAGES_GT:
save_batch_image_with_joints(
input, meta['joints'], meta['joints_vis'],
'{}_gt.jpg'.format(prefix)
)
if config.DEBUG.SAVE_BATCH_IMAGES_PRED:
save_batch_image_with_joints(
input, joints_pred, meta['joints_vis'],
'{}_pred.jpg'.format(prefix)
)
# if config.DEBUG.SAVE_HEATMAPS_GT:
# save_batch_heatmaps(
# input, target, '{}_hm_gt.jpg'.format(prefix)
# )
# if config.DEBUG.SAVE_HEATMAPS_PRED:
# save_batch_heatmaps(
# input, output, '{}_hm_pred.jpg'.format(prefix)
# )