CPN-Cascaded Pyramid Network for Multi-Person Pose Estimation 源码分析

CPN 和 Stacked Hourglass network 一样都是 top-down形式

也就是需要先将一个个人先检测出来,在经过CPN来一个个人来进行姿态估计。

论文地址:

https://arxiv.org/pdf/1711.07319.pdficon-default.png?t=M85Bhttps://arxiv.org/pdf/1711.07319.pdf代码地址:

https://github.com/GengDavid/pytorch-cpnicon-default.png?t=M85Bhttps://github.com/GengDavid/pytorch-cpn

首先看下网络结构

class CPN(nn.Module):
    def __init__(self, resnet, output_shape, num_class, pretrained=True):
        super(CPN, self).__init__()
        channel_settings = [2048, 1024, 512, 256]
        self.resnet = resnet
        self.global_net = globalNet(channel_settings, output_shape, num_class)
        self.refine_net = refineNet(channel_settings[-1], output_shape, num_class)

    def forward(self, x):
        # x shape : (B, 3, H, W)
        # 用 resnet 来当 backbone
        res_out = self.resnet(x)
        # 经过 global net 来 预测容易预测得点
        global_fms, global_outs = self.global_net(res_out)
        # refine net 来针对预测困难点
        refine_out = self.refine_net(global_fms)
        # globalnet 和 refinenet 得输出结果
        # global_outs len = 4 列表 shape [(B, num_joints, H // 4, W // 4), (B, num_joints, H // 4, W // 4),
        # (B, num_joints, H // 4, W // 4), (B, num_joints, H // 4, W // 4)]
        # refine_out shape (B, num_joints, H // 4, W // 4)
        return global_outs, refine_out

# 可以选择不同得backbone
def CPN50(out_size,num_class,pretrained=True):
    res50 = resnet50(pretrained=pretrained)
    model = CPN(res50, output_shape=out_size,num_class=num_class, pretrained=pretrained)
    return model

def CPN101(out_size,num_class,pretrained=True):
    res101 = resnet101(pretrained=pretrained)
    model = CPN(res101, output_shape=out_size,num_class=num_class, pretrained=pretrained)
    return model

接下来进入 训练代码

def main(args):
    # create checkpoint dir
    if not isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # create model
    # 创建模型
    model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained = True)
    # 多GPU 训练
    model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    # 定义 损失函数 因为要进行 globalnet 和 refinenet 得损失计算 所以定义了两个
    criterion1 = torch.nn.MSELoss().cuda() # for Global loss
    criterion2 = torch.nn.MSELoss(reduce=False).cuda() # for refine loss

    # 定义优化器
    optimizer = torch.optim.Adam(model.parameters(),
                                lr = cfg.lr,
                                weight_decay=cfg.weight_decay)

    if args.resume:
        # 是否加载预训练模型
        if isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            pretrained_dict = checkpoint['state_dict']
            model.load_state_dict(pretrained_dict)
            args.start_epoch = checkpoint['epoch']
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
            logger = Logger(join(args.checkpoint, 'log.txt'), resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        logger = Logger(join(args.checkpoint, 'log.txt'))
        logger.set_names(['Epoch', 'LR', 'Train Loss'])

    # 增加程序的运行效率
    # 如果网络的输入数据维度或类型上变化不大 有用
    # 设置这个 flag 为True,我们就可以在 PyTorch 中对模型里的卷积层进行预先的优化,
    # 也就是在每一个卷积层中测试 cuDNN 提供的所有卷积实现算法,然后选择最快的那个。
    # 这样在模型启动的时候,只要额外多花一点点预处理时间,就可以较大幅度地减少训练时间。
    cudnn.benchmark = True
    print('    Total params: %.2fMB' % (sum(p.numel() for p in model.parameters())/(1024*1024)*4))
    
    # 加载训练数据
    train_loader = torch.utils.data.DataLoader(
        MscocoMulti(cfg),
        batch_size=cfg.batch_size*args.num_gpus, shuffle=True,
        num_workers=args.workers, pin_memory=True)
    
    # 开始训练
    for epoch in range(args.start_epoch, args.epochs):
        # 设置 学习率
        # list(range(6,40,6))、
        lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch, cfg.lr_gamma)
        print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr))

        # train for one epoch
        # 训练一个epoch代码
        train_loss = train(train_loader, model, [criterion1, criterion2], optimizer)
        print('train_loss: ',train_loss)

        # append logger file
        logger.append([epoch + 1, lr, train_loss])
        # 保存模型
        save_model({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer' : optimizer.state_dict(),
        }, checkpoint=args.checkpoint)

    logger.close()

if __name__ == '__main__':
    # 训练配置参数
    parser = argparse.ArgumentParser(description='PyTorch CPN Training')
    parser.add_argument('-j', '--workers', default=12, type=int, metavar='N',
                        help='number of data loading workers (default: 12)')
    parser.add_argument('-g', '--num_gpus', default=1, type=int, metavar='N',
                        help='number of GPU to use (default: 1)')
    parser.add_argument('--epochs', default=32, type=int, metavar='N',
                        help='number of total epochs to run (default: 32)')
    parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
                        help='manual epoch number (useful on restarts)')
    parser.add_argument('-c', '--checkpoint', default='checkpoint', type=str, metavar='PATH',
                        help='path to save checkpoint (default: checkpoint)')
    parser.add_argument('--resume', default='', type=str, metavar='PATH',
                        help='path to latest checkpoint')

    # 入口函数
    main(parser.parse_args())

下面进入 train (训练每个epoch)函数 

def train(train_loader, model, criterions, optimizer):
    # prepare for refine loss
    def ohkm(loss, top_k):
        ohkm_loss = 0.
        for i in range(loss.size()[0]):
            sub_loss = loss[i]
            # 计算 top K 个难点
            topk_val, topk_idx = torch.topk(sub_loss, k=top_k, dim=0, sorted=False)
            tmp_loss = torch.gather(sub_loss, 0, topk_idx)
            ohkm_loss += torch.sum(tmp_loss) / top_k
        ohkm_loss /= loss.size()[0]
        return ohkm_loss
    
    # 定义两个损失函数 其实都是 MSE
    criterion1, criterion2 = criterions
    
    # 定义指标参数: 如时间和损失函数
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()

    # switch to train mode
    # 模型进入训练模式
    model.train()
    
    # 便利数据
    for i, (inputs, targets, valid, meta) in enumerate(train_loader):
        # 输入得图片数据
        input_var = torch.autograd.Variable(inputs.cuda())
        
        # 模型得输出 由 长度为 4 得 list得 globalnet输出 和 refinenet 得一个输出构成
        # 总共由五个输出
        # 所以这里标签由 5个 : 4 个 global target 1 个 refine target
        target15, target11, target9, target7 = targets
        refine_target_var = torch.autograd.Variable(target7.cuda())
        # 可以先理解为 mask 后面分析 dataset 再详细说
        valid_var = torch.autograd.Variable(valid.cuda())

        # compute output
        # 前向传播得到模型输出
        global_outputs, refine_output = model(input_var)
        score_map = refine_output.data.cpu()
        
        #  开始计算 loss
        loss = 0.
        global_loss_record = 0.
        refine_loss_record = 0.
        # comput global loss and refine loss
        # 遍历 FPN 每层输出 并 计算loss
        for global_output, label in zip(global_outputs, targets):
            num_points = global_output.size()[1]
            global_label = label * (valid > 1.1).type(torch.FloatTensor).view(-1, num_points, 1, 1)
            # 开始计算 loss
            global_loss = criterion1(global_output, torch.autograd.Variable(global_label.cuda())) / 2.0
            loss += global_loss
            global_loss_record += global_loss.data.item()
        # 计算 refine losss 因为 refinenet 主要用来计算检测困难点 
        refine_loss = criterion2(refine_output, refine_target_var)
        refine_loss = refine_loss.mean(dim=3).mean(dim=2)
        refine_loss *= (valid_var > 0.1).type(torch.cuda.FloatTensor)
        # 计算 top_k 个 检测困难得点
        refine_loss = ohkm(refine_loss, 8)
        loss += refine_loss
        refine_loss_record = refine_loss.data.item()

        # record loss
        losses.update(loss.data.item(), inputs.size(0))

        # compute gradient and do Optimization step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if(i%100==0 and i!=0):
            print('iteration {} | loss: {}, global loss: {}, refine loss: {}, avg loss: {}'
                .format(i, loss.data.item(), global_loss_record,
                    refine_loss_record, losses.avg))

    return losses.avg

 最后分析下 dataset 也就是 groundth 时怎么得到得

    def __getitem__(self, index):
        # 略

        if self.is_train:
            # 定义 global net 得4个label
            target15 = np.zeros((self.num_class, self.out_res[0], self.out_res[1]))
            target11 = np.zeros((self.num_class, self.out_res[0], self.out_res[1]))
            target9 = np.zeros((self.num_class, self.out_res[0], self.out_res[1]))
            target7 = np.zeros((self.num_class, self.out_res[0], self.out_res[1]))
            for i in range(self.num_class):
                if pts[i, 2] > 0: # COCO visible: 0-no label, 1-label + invisible, 2-label + visible
                    # 采用不同得卷积核和来生产不同得 heatmap 值
                    # gk15 = (23, 23)
                    # gk11 = (17, 17)
                    # gk9 = (13, 13)
                    # gk7 = (9, 9)
                    target15[i] = generate_heatmap(target15[i], pts[i], self.cfg.gk15)
                    target11[i] = generate_heatmap(target11[i], pts[i], self.cfg.gk11)
                    target9[i] = generate_heatmap(target9[i], pts[i], self.cfg.gk9)
                    target7[i] = generate_heatmap(target7[i], pts[i], self.cfg.gk7)
                    
            targets = [torch.Tensor(target15), torch.Tensor(target11), torch.Tensor(target9), torch.Tensor(target7)]
            # 关键点得可见度
            valid = pts[:, 2]

你可能感兴趣的:(源码分析,论文解析,计算机视觉)