【mmdetection】MaskRcnn中的mask是怎么输出的,是什么形式?

configs/base/models/mask_rcnn_r50_fpn.py

MaskRCNN:
	roi_head: StandardRoIHead
		mask_head: FCNMaskHead
StandardRoIHead.simple_test
=> MaskTestMixin::simple_test_mask
	=> mask_pred = StandardRoIHead::_mask_forward(x, mask_rois)
		=> mask_pred = FCNMaskHead::forward(roi_feats)
	=> FCNMaskHead::get_seg_masks(mask_pred, det_bbox, ..)
  1. 可以看到它是先将每个instance的mask按chunk方块的形式存储mask_chunk(一个h_oxw_o的二值矩阵),然后按对应的切片位置spatial_inds赋值给图片大小的矩阵im_mask。
im_mask = torch.zeros(
    N,
    img_h,
    img_w,
    device=device,
    dtype=torch.bool if threshold >= 0 else torch.uint8)

if not self.class_agnostic:
    mask_pred = mask_pred[range(N), labels][:, None]

for inds in chunks:
    masks_chunk, spatial_inds = _do_paste_mask(
        mask_pred[inds],
        bboxes[inds],
        img_h,
        img_w,
        skip_empty=device.type == 'cpu')

    if threshold >= 0:
        masks_chunk = (masks_chunk >= threshold).to(dtype=torch.bool)
    else:
        # for visualization and debugging
        masks_chunk = (masks_chunk * 255).to(dtype=torch.uint8)

    im_mask[(inds, ) + spatial_inds] = masks_chunk

for i in range(N):
    cls_segms[labels[i]].append(im_mask[i].detach().cpu().numpy())
  1. 返回的结果在这里被mask encode,另外发现这里可以show or out_dir
    mmdet/apis/test.py
def single_gpu_test(model,
                    data_loader,
                    show=False,
                    out_dir=None,
                    show_score_thr=0.3):
    model.eval()
    results = []
    dataset = data_loader.dataset
    prog_bar = mmcv.ProgressBar(len(dataset))
    for i, data in enumerate(data_loader):
        with torch.no_grad():
            result = model(return_loss=False, rescale=True, **data)

        batch_size = len(result)
        if show or out_dir:
            if batch_size == 1 and isinstance(data['img'][0], torch.Tensor):
                img_tensor = data['img'][0]
            else:
                img_tensor = data['img'][0].data[0]
            img_metas = data['img_metas'][0].data[0]
            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
            assert len(imgs) == len(img_metas)

            for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):
                h, w, _ = img_meta['img_shape']
                img_show = img[:h, :w, :]

                ori_h, ori_w = img_meta['ori_shape'][:-1]
                img_show = mmcv.imresize(img_show, (ori_w, ori_h))

                if out_dir:
                    out_file = osp.join(out_dir, img_meta['ori_filename'])
                else:
                    out_file = None

                model.module.show_result(
                    img_show,
                    result[i],
                    show=show,
                    out_file=out_file,
                    score_thr=show_score_thr)

        # encode mask results
        if isinstance(result[0], tuple):
            result = [(bbox_results, encode_mask_results(mask_results))
                      for bbox_results, mask_results in result]
        results.extend(result)

        for _ in range(batch_size):
            prog_bar.update()
    return results

  1. mmdet/datasets/coco.py
def results2json(self, results, outfile_prefix):
    result_files = dict()
    if isinstance(results[0], list):
        json_results = self._det2json(results)
        result_files['bbox'] = f'{outfile_prefix}.bbox.json'
        result_files['proposal'] = f'{outfile_prefix}.bbox.json'
        mmcv.dump(json_results, result_files['bbox'])
    elif isinstance(results[0], tuple):
        json_results = self._segm2json(results)
        result_files['bbox'] = f'{outfile_prefix}.bbox.json'
        result_files['proposal'] = f'{outfile_prefix}.bbox.json'
        result_files['segm'] = f'{outfile_prefix}.segm.json'
        mmcv.dump(json_results[0], result_files['bbox'])
        mmcv.dump(json_results[1], result_files['segm'])

def _segm2json(self, results):
    """Convert instance segmentation results to COCO json style."""
    bbox_json_results = []
    segm_json_results = []
    for idx in range(len(self)):
        img_id = self.img_ids[idx]
        det, seg = results[idx]
        for label in range(len(det)):
            # bbox results
            bboxes = det[label]
            for i in range(bboxes.shape[0]):
                data = dict()
                data['image_id'] = img_id
                data['bbox'] = self.xyxy2xywh(bboxes[i])
                data['score'] = float(bboxes[i][4])
                data['category_id'] = self.cat_ids[label]
                bbox_json_results.append(data)

            # segm results
            # some detectors use different scores for bbox and mask
            if isinstance(seg, tuple):
                segms = seg[0][label]
                mask_score = seg[1][label]
            else:
                segms = seg[label]
                mask_score = [bbox[4] for bbox in bboxes]
            for i in range(bboxes.shape[0]):
                data = dict()
                data['image_id'] = img_id
                data['bbox'] = self.xyxy2xywh(bboxes[i])
                data['score'] = float(mask_score[i])
                data['category_id'] = self.cat_ids[label]
                if isinstance(segms[i]['counts'], bytes):
                    segms[i]['counts'] = segms[i]['counts'].decode()
                data['segmentation'] = segms[i]
                segm_json_results.append(data)
    return bbox_json_results, segm_json_results

调用链
【mmdetection】MaskRcnn中的mask是怎么输出的,是什么形式?_第1张图片

  1. AttributeError: ‘ConfigDict’ object has no attribute ‘log_level’
    configs/base/models配置不完整,你应该调用mmdetection/configs/faster_rcnn/里面任何一个配置文件就可以了

你可能感兴趣的:(pytorch,mmdetection源码阅读,pytorch)