CPN 和 Stacked Hourglass network 一样都是 top-down形式
也就是需要先将一个个人先检测出来,在经过CPN来一个个人来进行姿态估计。
论文地址:
https://arxiv.org/pdf/1711.07319.pdfhttps://arxiv.org/pdf/1711.07319.pdf代码地址:
https://github.com/GengDavid/pytorch-cpnhttps://github.com/GengDavid/pytorch-cpn
首先看下网络结构
class CPN(nn.Module):
def __init__(self, resnet, output_shape, num_class, pretrained=True):
super(CPN, self).__init__()
channel_settings = [2048, 1024, 512, 256]
self.resnet = resnet
self.global_net = globalNet(channel_settings, output_shape, num_class)
self.refine_net = refineNet(channel_settings[-1], output_shape, num_class)
def forward(self, x):
# x shape : (B, 3, H, W)
# 用 resnet 来当 backbone
res_out = self.resnet(x)
# 经过 global net 来 预测容易预测得点
global_fms, global_outs = self.global_net(res_out)
# refine net 来针对预测困难点
refine_out = self.refine_net(global_fms)
# globalnet 和 refinenet 得输出结果
# global_outs len = 4 列表 shape [(B, num_joints, H // 4, W // 4), (B, num_joints, H // 4, W // 4),
# (B, num_joints, H // 4, W // 4), (B, num_joints, H // 4, W // 4)]
# refine_out shape (B, num_joints, H // 4, W // 4)
return global_outs, refine_out
# 可以选择不同得backbone
def CPN50(out_size,num_class,pretrained=True):
res50 = resnet50(pretrained=pretrained)
model = CPN(res50, output_shape=out_size,num_class=num_class, pretrained=pretrained)
return model
def CPN101(out_size,num_class,pretrained=True):
res101 = resnet101(pretrained=pretrained)
model = CPN(res101, output_shape=out_size,num_class=num_class, pretrained=pretrained)
return model
接下来进入 训练代码
def main(args):
# create checkpoint dir
if not isdir(args.checkpoint):
mkdir_p(args.checkpoint)
# create model
# 创建模型
model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained = True)
# 多GPU 训练
model = torch.nn.DataParallel(model).cuda()
# define loss function (criterion) and optimizer
# 定义 损失函数 因为要进行 globalnet 和 refinenet 得损失计算 所以定义了两个
criterion1 = torch.nn.MSELoss().cuda() # for Global loss
criterion2 = torch.nn.MSELoss(reduce=False).cuda() # for refine loss
# 定义优化器
optimizer = torch.optim.Adam(model.parameters(),
lr = cfg.lr,
weight_decay=cfg.weight_decay)
if args.resume:
# 是否加载预训练模型
if isfile(args.resume):
print("=> loading checkpoint '{}'".format(args.resume))
checkpoint = torch.load(args.resume)
pretrained_dict = checkpoint['state_dict']
model.load_state_dict(pretrained_dict)
args.start_epoch = checkpoint['epoch']
optimizer.load_state_dict(checkpoint['optimizer'])
print("=> loaded checkpoint '{}' (epoch {})"
.format(args.resume, checkpoint['epoch']))
logger = Logger(join(args.checkpoint, 'log.txt'), resume=True)
else:
print("=> no checkpoint found at '{}'".format(args.resume))
else:
logger = Logger(join(args.checkpoint, 'log.txt'))
logger.set_names(['Epoch', 'LR', 'Train Loss'])
# 增加程序的运行效率
# 如果网络的输入数据维度或类型上变化不大 有用
# 设置这个 flag 为True,我们就可以在 PyTorch 中对模型里的卷积层进行预先的优化,
# 也就是在每一个卷积层中测试 cuDNN 提供的所有卷积实现算法,然后选择最快的那个。
# 这样在模型启动的时候,只要额外多花一点点预处理时间,就可以较大幅度地减少训练时间。
cudnn.benchmark = True
print(' Total params: %.2fMB' % (sum(p.numel() for p in model.parameters())/(1024*1024)*4))
# 加载训练数据
train_loader = torch.utils.data.DataLoader(
MscocoMulti(cfg),
batch_size=cfg.batch_size*args.num_gpus, shuffle=True,
num_workers=args.workers, pin_memory=True)
# 开始训练
for epoch in range(args.start_epoch, args.epochs):
# 设置 学习率
# list(range(6,40,6))、
lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch, cfg.lr_gamma)
print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr))
# train for one epoch
# 训练一个epoch代码
train_loss = train(train_loader, model, [criterion1, criterion2], optimizer)
print('train_loss: ',train_loss)
# append logger file
logger.append([epoch + 1, lr, train_loss])
# 保存模型
save_model({
'epoch': epoch + 1,
'state_dict': model.state_dict(),
'optimizer' : optimizer.state_dict(),
}, checkpoint=args.checkpoint)
logger.close()
if __name__ == '__main__':
# 训练配置参数
parser = argparse.ArgumentParser(description='PyTorch CPN Training')
parser.add_argument('-j', '--workers', default=12, type=int, metavar='N',
help='number of data loading workers (default: 12)')
parser.add_argument('-g', '--num_gpus', default=1, type=int, metavar='N',
help='number of GPU to use (default: 1)')
parser.add_argument('--epochs', default=32, type=int, metavar='N',
help='number of total epochs to run (default: 32)')
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
help='manual epoch number (useful on restarts)')
parser.add_argument('-c', '--checkpoint', default='checkpoint', type=str, metavar='PATH',
help='path to save checkpoint (default: checkpoint)')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
help='path to latest checkpoint')
# 入口函数
main(parser.parse_args())
下面进入 train (训练每个epoch)函数
def train(train_loader, model, criterions, optimizer):
# prepare for refine loss
def ohkm(loss, top_k):
ohkm_loss = 0.
for i in range(loss.size()[0]):
sub_loss = loss[i]
# 计算 top K 个难点
topk_val, topk_idx = torch.topk(sub_loss, k=top_k, dim=0, sorted=False)
tmp_loss = torch.gather(sub_loss, 0, topk_idx)
ohkm_loss += torch.sum(tmp_loss) / top_k
ohkm_loss /= loss.size()[0]
return ohkm_loss
# 定义两个损失函数 其实都是 MSE
criterion1, criterion2 = criterions
# 定义指标参数: 如时间和损失函数
batch_time = AverageMeter()
data_time = AverageMeter()
losses = AverageMeter()
# switch to train mode
# 模型进入训练模式
model.train()
# 便利数据
for i, (inputs, targets, valid, meta) in enumerate(train_loader):
# 输入得图片数据
input_var = torch.autograd.Variable(inputs.cuda())
# 模型得输出 由 长度为 4 得 list得 globalnet输出 和 refinenet 得一个输出构成
# 总共由五个输出
# 所以这里标签由 5个 : 4 个 global target 1 个 refine target
target15, target11, target9, target7 = targets
refine_target_var = torch.autograd.Variable(target7.cuda())
# 可以先理解为 mask 后面分析 dataset 再详细说
valid_var = torch.autograd.Variable(valid.cuda())
# compute output
# 前向传播得到模型输出
global_outputs, refine_output = model(input_var)
score_map = refine_output.data.cpu()
# 开始计算 loss
loss = 0.
global_loss_record = 0.
refine_loss_record = 0.
# comput global loss and refine loss
# 遍历 FPN 每层输出 并 计算loss
for global_output, label in zip(global_outputs, targets):
num_points = global_output.size()[1]
global_label = label * (valid > 1.1).type(torch.FloatTensor).view(-1, num_points, 1, 1)
# 开始计算 loss
global_loss = criterion1(global_output, torch.autograd.Variable(global_label.cuda())) / 2.0
loss += global_loss
global_loss_record += global_loss.data.item()
# 计算 refine losss 因为 refinenet 主要用来计算检测困难点
refine_loss = criterion2(refine_output, refine_target_var)
refine_loss = refine_loss.mean(dim=3).mean(dim=2)
refine_loss *= (valid_var > 0.1).type(torch.cuda.FloatTensor)
# 计算 top_k 个 检测困难得点
refine_loss = ohkm(refine_loss, 8)
loss += refine_loss
refine_loss_record = refine_loss.data.item()
# record loss
losses.update(loss.data.item(), inputs.size(0))
# compute gradient and do Optimization step
optimizer.zero_grad()
loss.backward()
optimizer.step()
if(i%100==0 and i!=0):
print('iteration {} | loss: {}, global loss: {}, refine loss: {}, avg loss: {}'
.format(i, loss.data.item(), global_loss_record,
refine_loss_record, losses.avg))
return losses.avg
最后分析下 dataset 也就是 groundth 时怎么得到得
def __getitem__(self, index):
# 略
if self.is_train:
# 定义 global net 得4个label
target15 = np.zeros((self.num_class, self.out_res[0], self.out_res[1]))
target11 = np.zeros((self.num_class, self.out_res[0], self.out_res[1]))
target9 = np.zeros((self.num_class, self.out_res[0], self.out_res[1]))
target7 = np.zeros((self.num_class, self.out_res[0], self.out_res[1]))
for i in range(self.num_class):
if pts[i, 2] > 0: # COCO visible: 0-no label, 1-label + invisible, 2-label + visible
# 采用不同得卷积核和来生产不同得 heatmap 值
# gk15 = (23, 23)
# gk11 = (17, 17)
# gk9 = (13, 13)
# gk7 = (9, 9)
target15[i] = generate_heatmap(target15[i], pts[i], self.cfg.gk15)
target11[i] = generate_heatmap(target11[i], pts[i], self.cfg.gk11)
target9[i] = generate_heatmap(target9[i], pts[i], self.cfg.gk9)
target7[i] = generate_heatmap(target7[i], pts[i], self.cfg.gk7)
targets = [torch.Tensor(target15), torch.Tensor(target11), torch.Tensor(target9), torch.Tensor(target7)]
# 关键点得可见度
valid = pts[:, 2]