windows10:HRNet+环境配置(pytorch)+网络改动

HRNet模型源代码:https://github.com/HRNet/HRNet-Human-Pose-Estimation
安装pytorch(CUDA版本:10.0):

#conda install pytorch==1.0.0 torchvision==0.2.1 cuda100 -c pytorch
conda install pytorch==1.2.0 torchvision==0.4.0 cudatoolkit=10.0

安装HRNet环境:

EasyDict==1.7
opencv-python
shapely
Cython
scipy
pandas
pyyamlp
json_tricks
scikit-image
yacs>=0.1.5
tensorboardX>=1.6
#pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host pypi.tuna.tsinghua.edu.cn 

改动地方:
1.main() --line93 --将模型加载到gpu
原:

model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
        cfg, is_train=True
        )

改:

model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
        cfg, is_train=True
    ).cuda()

2.main() --line110 --将输入张量加载到gpu
原:

dump_input = torch.rand(
        (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])
    )

改:

dump_input = torch.rand(
        (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])
    ).cuda()

3.main() --line111 --报错
注释掉
4.main() --line115 --单gpu
注释掉
5. lib/core/function.py --line39 --将输入数据加载到gpu
增加:

input = input.cuda()

6.lib/core/function.py --line43 --将输出数据加载到gpu
原:

outputs = model(input)

改:

outputs = model(input).cuda()

7.lib/dateset/coco.py --line113 --训练标签路径

8.lib/dateset/coco.py --line232 --训练图片路径

9.lib/dateset/coco.py --line136 --原训练集太大,进行训练集缩小
原:

def _load_coco_keypoint_annotations(self):
        """ ground truth bbox and keypoints """
        gt_db = []
        for index in self.image_set_index:
            gt_db.extend(self._load_coco_keypoint_annotation_kernal(index))
        return gt_db

改:

def _load_coco_keypoint_annotations(self):
        """ ground truth bbox and keypoints """
        gt_db = []
        i = 0
        for index in self.image_set_index:
            if i%25 == 0:
                gt_db.extend(self._load_coco_keypoint_annotation_kernal(index))
            i +=1
        return gt_db

10.lib/nms/nms.py --line13,14无法导入模块
注释13,14行,增加cpu_nms函数:

def cpu_nms(dets, thresh):
 
    x1 = dets[:,0]
    y1 = dets[:,1]
    x2 = dets[:,2]
    y2 = dets[:,3]
    areas = (y2-y1+1) * (x2-x1+1)
    scores = dets[:,4]
    keep = []
    index = scores.argsort()[::-1]
    while index.size >0:
        i = index[0]       # every time the first is the biggst, and add it directly
        keep.append(i)
 
 
        x11 = np.maximum(x1[i], x1[index[1:]])    # calculate the points of overlap 
        y11 = np.maximum(y1[i], y1[index[1:]])
        x22 = np.minimum(x2[i], x2[index[1:]])
        y22 = np.minimum(y2[i], y2[index[1:]])
        
 
        w = np.maximum(0, x22-x11+1)    # the weights of overlap
        h = np.maximum(0, y22-y11+1)    # the height of overlap
       
        overlaps = w*h
        ious = overlaps / (areas[i]+areas[index[1:]] - overlaps)
 
        idx = np.where(ious<=thresh)[0]
        index = index[idx+1]   # because index start from 1
 
    return keep

11.修改w32_384x288_adam_lr1e-3.yaml文件

AUTO_RESUME: true
CUDNN:
  BENCHMARK: false
  DETERMINISTIC: true
  ENABLED: true
DATA_DIR: ''
# GPUS: (0,1,2,3)
GPUS: (0,)
OUTPUT_DIR: 'output'
LOG_DIR: 'log'
WORKERS: 0
PRINT_FREQ: 1

DATASET:
  COLOR_RGB: true
  DATASET: 'coco'
  DATA_FORMAT: jpg
  FLIP: true
  NUM_JOINTS_HALF_BODY: 8
  PROB_HALF_BODY: 0.3
  ROOT: 'E:/MS_COCO/'
#   ROOT: 'data/coco/'
  ROT_FACTOR: 45
  SCALE_FACTOR: 0.35
  TEST_SET: 'val2017'
  TRAIN_SET: 'train2017'
MODEL:
  INIT_WEIGHTS: False
  NAME: pose_hrnet
  NUM_JOINTS: 17
  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
  TARGET_TYPE: gaussian
  IMAGE_SIZE:
  - 288
  - 384
  HEATMAP_SIZE:
  - 72
  - 96
  SIGMA: 3
  EXTRA:
    PRETRAINED_LAYERS:
    - 'conv1'
    - 'bn1'
    - 'conv2'
    - 'bn2'
    - 'layer1'
    - 'transition1'
    - 'stage2'
    - 'transition2'
    - 'stage3'
    - 'transition3'
    - 'stage4'
    FINAL_CONV_KERNEL: 1
    STAGE2:
      NUM_MODULES: 1
      NUM_BRANCHES: 2
      BLOCK: BASIC
      NUM_BLOCKS:
      - 4
      - 4
      NUM_CHANNELS:
      - 32
      - 64
      FUSE_METHOD: SUM
    STAGE3:
      NUM_MODULES: 4
      NUM_BRANCHES: 3
      BLOCK: BASIC
      NUM_BLOCKS:
      - 4
      - 4
      - 4
      NUM_CHANNELS:
      - 32
      - 64
      - 128
      FUSE_METHOD: SUM
    STAGE4:
      NUM_MODULES: 3
      NUM_BRANCHES: 4
      BLOCK: BASIC
      NUM_BLOCKS:
      - 4
      - 4
      - 4
      - 4
      NUM_CHANNELS:
      - 32
      - 64
      - 128
      - 256
      FUSE_METHOD: SUM
LOSS:
  USE_TARGET_WEIGHT: true
TRAIN:
  BATCH_SIZE_PER_GPU: 4
  SHUFFLE: true
  BEGIN_EPOCH: 0
  END_EPOCH: 210
  OPTIMIZER: adam
  LR: 0.001
  LR_FACTOR: 0.1
  LR_STEP:
  - 170
  - 200
  WD: 0.0001
  GAMMA1: 0.99
  GAMMA2: 0.0
  MOMENTUM: 0.9
  NESTEROV: false
TEST:
  BATCH_SIZE_PER_GPU: 16
  COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
  BBOX_THRE: 1.0
  IMAGE_THRE: 0.0
  IN_VIS_THRE: 0.2
  MODEL_FILE: ''
  NMS_THRE: 1.0
  OKS_THRE: 0.9
  USE_GT_BBOX: true
  FLIP_TEST: true
  POST_PROCESS: true
  SHIFT_HEATMAP: true
DEBUG:
  DEBUG: true
  SAVE_BATCH_IMAGES_GT: true
  SAVE_BATCH_IMAGES_PRED: true
  SAVE_HEATMAPS_GT: true
  SAVE_HEATMAPS_PRED: true

12.生成单个人的pre_image和pre_heatmap
须设置batchsize=1
1>修改E:/HRNet-Human-Pose-Estimation-master/lib/core/function.py文件中的validate

def validate(config, val_loader, val_dataset, model, criterion, output_dir,
             tb_log_dir, all_accuracy, writer_dict=None):
    batch_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()

    # switch to evaluate mode
    model.eval()

    num_samples = len(val_dataset)
    all_preds = np.zeros(
        (num_samples, config.MODEL.NUM_JOINTS, 3),
        dtype=np.float32
    )
    all_boxes = np.zeros((num_samples, 6))
    image_path = []
    filenames = []
    imgnums = []
    idx = 0
    with torch.no_grad():
        end = time.time()
        for i, (input, target, target_weight, meta) in enumerate(val_loader):
            # compute output
#             print('i',i)
#             print('input_shape',input.shape)
            outputs = model(input)
            if isinstance(outputs, list):
                output = outputs[-1]
            else:
                output = outputs

            if config.TEST.FLIP_TEST:
                # this part is ugly, because pytorch has not supported negative index
                # input_flipped = model(input[:, :, :, ::-1])
                input_flipped = np.flip(input.cpu().numpy(), 3).copy()
                input_flipped = torch.from_numpy(input_flipped).cuda()
                outputs_flipped = model(input_flipped)

                if isinstance(outputs_flipped, list):
                    output_flipped = outputs_flipped[-1]
                else:
                    output_flipped = outputs_flipped

                output_flipped = flip_back(output_flipped.cpu().numpy(),
                                           val_dataset.flip_pairs)
                output_flipped = torch.from_numpy(output_flipped.copy()).cuda()


                # feature is not aligned, shift flipped heatmap for higher accuracy
                if config.TEST.SHIFT_HEATMAP:
                    output_flipped[:, :, :, 1:] = \
                        output_flipped.clone()[:, :, :, 0:-1]

                output = (output + output_flipped) * 0.5

            target = target.cuda(non_blocking=True)
            target_weight = target_weight.cuda(non_blocking=True)

            loss = criterion(output, target, target_weight)

            num_images = input.size(0)
            # measure accuracy and record loss
            losses.update(loss.item(), num_images)
            _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(),
                                             target.cpu().numpy())

            acc.update(avg_acc, cnt)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            c = meta['center'].numpy()
            s = meta['scale'].numpy()
#             print('c,s',c,s)
            score = meta['score'].numpy()

            preds, maxvals = get_final_preds(
                config, output.clone().cpu().numpy(), c, s)

            all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2]
            all_preds[idx:idx + num_images, :, 2:3] = maxvals
            # double check this all_boxes parts
            all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2]
            all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2]
            all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1)
            all_boxes[idx:idx + num_images, 5] = score
#             image_path.extend(meta['image'])
            image_name = str(meta['image'])[-18:-6]
#             name=str(meta['image']).split("//")[1].split(".")[0]
#             print(image[-18:-6])
            idx += num_images

#             if i % config.PRINT_FREQ != 0:
#             if i % config.PRINT_FREQ != 0:
#                 print(i)
#             msg = 'Test: [{0}/{1}]\t' \
#                   'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \
#                   'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \
#                   'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
#                       i, len(val_loader), batch_time=batch_time,
#                       loss=losses, acc=acc)
#             logger.info(msg)
#             all_accuracy.append((loss.item(),avg_acc))

            prefix = '{}_{}_{}'.format(
                os.path.join(output_dir, 'val'), i,image_name
            )
            save_debug_images(config, input, meta, target, pred*4, output,prefix)            
#         name_values, perf_indicator = val_dataset.evaluate(
#             config, all_preds, output_dir, all_boxes, image_path,
#             filenames, imgnums
#         )

#         model_name = config.MODEL.NAME
#         if isinstance(name_values, list):
#             for name_value in name_values:
#                 _print_name_value(name_value, model_name)
#         else:
#             _print_name_value(name_values, model_name)

#         if writer_dict:
#             writer = writer_dict['writer']
#             global_steps = writer_dict['valid_global_steps']
#             writer.add_scalar(
#                 'valid_loss',
#                 losses.avg,
#                 global_steps
#             )
#             writer.add_scalar(
#                 'valid_acc',
#                 acc.avg,
#                 global_steps
#             )
#             if isinstance(name_values, list):
#                 for name_value in name_values:
#                     writer.add_scalars(
#                         'valid',
#                         dict(name_value),
#                         global_steps
#                     )
#             else:
#                 writer.add_scalars(
#                     'valid',
#                     dict(name_values),
#                     global_steps
#                 )
#             writer_dict['valid_global_steps'] = global_steps + 1

#     return perf_indicator

2>修改E:/HRNet-Human-Pose-Estimation-master/lib/utils/vis.py文件

# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Written by Bin Xiao ([email protected])
# ------------------------------------------------------------------------------

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math

import numpy as np
import torchvision
import cv2

from core.inference import get_max_preds


def save_batch_image_with_joints(batch_image, batch_joints, batch_joints_vis,
                                 file_name, nrow=8, padding=2):
    '''
    batch_image: [batch_size, channel, height, width]
    batch_joints: [batch_size, num_joints, 3],
    batch_joints_vis: [batch_size, num_joints, 1],
    }
    '''
    grid = torchvision.utils.make_grid(batch_image, nrow, padding, True)
    ndarr = grid.mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy()
    ndarr = ndarr.copy()
#     print('ndarr.shape',ndarr.shape)

    nmaps = batch_image.size(0)
    xmaps = min(nrow, nmaps)
    ymaps = int(math.ceil(float(nmaps) / xmaps))
    height = int(batch_image.size(2) + padding)
    width = int(batch_image.size(3) + padding)
    k = 0
    for y in range(ymaps):
        for x in range(xmaps):
            if k >= nmaps:
                break
            joints = batch_joints[k]
            joints_vis = batch_joints_vis[k]

            for joint, joint_vis in zip(joints, joints_vis):
                joint[0] = x * width + padding + joint[0]
                joint[1] = y * height + padding + joint[1]
                if joint_vis[0]:
                    cv2.circle(ndarr, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2)
            k = k + 1
    cv2.imwrite(file_name, ndarr)


def save_batch_heatmaps(batch_image, batch_heatmaps, file_name,
                        normalize=True):
    '''
    batch_image: [batch_size, channel, height, width]
    batch_heatmaps: ['batch_size, num_joints, height, width]
    file_name: saved file name
    '''
    if normalize:
        batch_image = batch_image.clone()
        min = float(batch_image.min())
        max = float(batch_image.max())

        batch_image.add_(-min).div_(max - min + 1e-5)

    batch_size = batch_heatmaps.size(0)
    num_joints = batch_heatmaps.size(1)
    heatmap_height = batch_heatmaps.size(2)
    heatmap_width = batch_heatmaps.size(3)
#     print(heatmap_height)

    grid_image = np.zeros((batch_size*heatmap_height,
                           (num_joints+1)*heatmap_width,
                           3),
                          dtype=np.uint8)
    
    preds, maxvals = get_max_preds(batch_heatmaps.detach().cpu().numpy())

#     # 输出:96x72
#     for i in range(batch_size):
#         image = batch_image[i].mul(255)\
#                               .clamp(0, 255)\
#                               .byte()\
#                               .permute(1, 2, 0)\
#                               .cpu().numpy()
#         heatmaps = batch_heatmaps[i].mul(255)\
#                                     .clamp(0, 255)\
#                                     .byte()\
#                                     .cpu().numpy()

#         resized_image = cv2.resize(image,
#                                    (int(heatmap_width), int(heatmap_height)))

#         height_begin = heatmap_height * i
#         height_end = heatmap_height * (i + 1)
#         for j in range(num_joints):
#             per_grid_image = np.zeros((batch_size*heatmap_height,
#                            1*heatmap_width,
#                            3),
#                           dtype=np.uint8)
#             per_resized_image = cv2.resize(image,
#                                    (int(heatmap_width), int(heatmap_height)))
#             cv2.circle(per_resized_image,
#                        (int(preds[i][j][0]), int(preds[i][j][1])),
#                        1, [0, 0, 255], 1)
#             heatmap = heatmaps[j, :, :]
#             colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
#             per_masked_image = colored_heatmap*0.7 + per_resized_image*0.3
#             cv2.circle(per_masked_image,
#                        (int(preds[i][j][0]), int(preds[i][j][1])),
#                        1, [0, 0, 255], 1)
#             per_width_begin = heatmap_width * 0
#             per_width_end = heatmap_width * 1
#             per_grid_image[height_begin:height_end, per_width_begin:per_width_end, :] = \
#                 per_masked_image
#             s = str
#             name = 'E:/HRNet-Human-Pose-Estimation-master/output/coco/new_pose_hrnet/test/' + 'pre_' + str(j) + '.jpg'
# #             name = 'E:/HRNet-Human-Pose-Estimation-master/output/coco/new_pose_hrnet/test/' + 'gt_' + str(j+16) + '.jpg'
#             cv2.imwrite(name, per_masked_image)
            
#     输出:48x64
    for i in range(batch_size):
        image = batch_image[i].mul(255)\
                              .clamp(0, 255)\
                              .byte()\
                              .permute(1, 2, 0)\
                              .cpu().numpy()
        heatmaps = batch_heatmaps[i].mul(255)\
                                    .clamp(0, 255)\
                                    .byte()\
                                    .cpu().numpy()

        resized_image = cv2.resize(image,
                                   (int(heatmap_width), int(heatmap_height)))

        height_begin = heatmap_height * i
        height_end = heatmap_height * (i + 1)
        for j in range(num_joints):
            per_grid_image = np.zeros((batch_size*heatmap_height,
                           1*heatmap_width,
                           3),
                          dtype=np.uint8)
            per_resized_image = cv2.resize(image,(int(heatmap_width*4), int(heatmap_height*4)))
            cv2.circle(per_resized_image,
                       (int(preds[i][j][0]*4), int(preds[i][j][1])*4),
                       1, [0, 0, 255], 1)
            heatmap = heatmaps[j, :, :]
            heatmap = cv2.resize(heatmap,(int(heatmap_width*4), int(heatmap_height*4)))
            colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
            per_masked_image = colored_heatmap*0.7 + per_resized_image*0.3
            cv2.circle(per_masked_image,
                       (int(preds[i][j][0]), int(preds[i][j][1])),
                       1, [0, 0, 255], 1)
#             per_width_begin = heatmap_width * 0
#             per_width_end = heatmap_width * 1
#             per_grid_image[height_begin:height_end, per_width_begin:per_width_end, :] = \
#                 per_masked_image
            s = str
            name = 'E:/HRNet-Human-Pose-Estimation-master/output/coco/new_pose_hrnet/test/' + 'pre_' + str(j) + '.jpg'
#             name = 'E:/HRNet-Human-Pose-Estimation-master/output/coco/new_pose_hrnet/test/' + 'gt_' + str(j+16) + '.jpg'
            cv2.imwrite(name, per_masked_image)
        
    # 原始
#     for i in range(batch_size):
#         image = batch_image[i].mul(255)\
#                               .clamp(0, 255)\
#                               .byte()\
#                               .permute(1, 2, 0)\
#                               .cpu().numpy()
#         heatmaps = batch_heatmaps[i].mul(255)\
#                                     .clamp(0, 255)\
#                                     .byte()\
#                                     .cpu().numpy()

#         resized_image = cv2.resize(image,
#                                    (int(heatmap_width), int(heatmap_height)))

#         height_begin = heatmap_height * i
#         height_end = heatmap_height * (i + 1)
#         for j in range(num_joints):
#             cv2.circle(resized_image,
#                        (int(preds[i][j][0]), int(preds[i][j][1])),
#                        1, [0, 0, 255], 1)
#             heatmap = heatmaps[j, :, :]
#             colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
#             masked_image = colored_heatmap*0.7 + resized_image*0.3
#             cv2.circle(masked_image,
#                        (int(preds[i][j][0]), int(preds[i][j][1])),
#                        1, [0, 0, 255], 1)
#             width_begin = heatmap_width * (j+1)
#             width_end = heatmap_width * (j+2)
#             grid_image[height_begin:height_end, width_begin:width_end, :] = \
#                 masked_image
#             grid_image[height_begin:height_end, width_begin:width_end, :] = \
#                 colored_heatmap*0.7 + resized_image*0.3

#         grid_image[height_begin:height_end, 0:heatmap_width, :] = resized_image

#     cv2.imwrite(file_name, grid_image)


def save_debug_images(config, input, meta, target, joints_pred, output,
                      prefix):
    if not config.DEBUG.DEBUG:
        return

    if config.DEBUG.SAVE_BATCH_IMAGES_GT:
        save_batch_image_with_joints(
            input, meta['joints'], meta['joints_vis'],
            '{}_gt.jpg'.format(prefix)
        )
    if config.DEBUG.SAVE_BATCH_IMAGES_PRED:
        save_batch_image_with_joints(
            input, joints_pred, meta['joints_vis'],
            '{}_pred.jpg'.format(prefix)
        )
#     if config.DEBUG.SAVE_HEATMAPS_GT:
#         save_batch_heatmaps(
#             input, target, '{}_hm_gt.jpg'.format(prefix)
#         )
#     if config.DEBUG.SAVE_HEATMAPS_PRED:
#         save_batch_heatmaps(
#             input, output, '{}_hm_pred.jpg'.format(prefix)
#         )

你可能感兴趣的:(深度学习,pytorch)