mmdetection/mmdet/datasets/coco.py
关于怎么进行ignore,在instances_train2017.json中是否存在ignore,存在bbox和对应的segm进行continue操作
def _parse_ann_info(self, ann_info, with_mask=True):
"""Parse bbox and mask annotation.
Args:
ann_info (list[dict]): Annotation info of an image.
with_mask (bool): Whether to parse mask annotations.
Returns:
dict: A dict containing the following keys: bboxes, bboxes_ignore,
labels, masks, mask_polys, poly_lens.
"""
gt_bboxes = []
gt_labels = []
gt_bboxes_ignore = []
# Two formats are provided.
# 1. mask: a binary map of the same size of the image.
# 2. polys: each mask consists of one or several polys, each poly is a
# list of float.
if with_mask:
gt_masks = []
gt_mask_polys = []
gt_poly_lens = []
for i, ann in enumerate(ann_info):
if ann.get('ignore', False):
continue
x1, y1, w, h = ann['bbox']
if ann['area'] <= 0 or w < 1 or h < 1:
continue
bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
if ann['iscrowd']:
gt_bboxes_ignore.append(bbox)
else:
gt_bboxes.append(bbox)
gt_labels.append(self.cat2label[ann['category_id']])
然后:
mmdetection/mmdet/datasets/custom.py
def prepare_train_img(self, idx):
img_info = self.img_infos[idx]
# load image
img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))
# corruption
if self.corruption is not None:
img = corrupt(
img,
severity=self.corruption_severity,
corruption_name=self.corruption)
# load proposals if necessary
if self.proposals is not None:
proposals = self.proposals[idx][:self.num_max_proposals]
# TODO: Handle empty proposals properly. Currently images with
# no proposals are just ignored, but they can be used for
# training in concept.
if len(proposals) == 0:
return None
if not (proposals.shape[1] == 4 or proposals.shape[1] == 5):
raise AssertionError(
'proposals should have shapes (n, 4) or (n, 5), '
'but found {}'.format(proposals.shape))
if proposals.shape[1] == 5:
scores = proposals[:, 4, None]
proposals = proposals[:, :4]
else:
scores = None
ann = self.get_ann_info(idx)
gt_bboxes = ann['bboxes']
gt_labels = ann['labels']
if self.with_crowd:
gt_bboxes_ignore = ann['bboxes_ignore']
# skip the image if there is no valid gt bbox
if len(gt_bboxes) == 0 and self.skip_img_without_anno:
warnings.warn('Skip the image "%s" that has no valid gt bbox' %
osp.join(self.img_prefix, img_info['filename']))
return None
# extra augmentation
if self.extra_aug is not None:
img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes,
gt_labels)
# apply transforms
flip = True if np.random.rand() < self.flip_ratio else False
# randomly sample a scale
img_scale = random_scale(self.img_scales, self.multiscale_mode)
img, img_shape, pad_shape, scale_factor = self.img_transform(
img, img_scale, flip, keep_ratio=self.resize_keep_ratio)
img = img.copy()
if self.with_seg:
gt_seg = mmcv.imread(
osp.join(self.seg_prefix,
img_info['filename'].replace('jpg', 'png')),
flag='unchanged')
gt_seg = self.seg_transform(gt_seg.squeeze(), img_scale, flip)
gt_seg = mmcv.imrescale(
gt_seg, self.seg_scale_factor, interpolation='nearest')
gt_seg = gt_seg[None, ...]
if self.proposals is not None:
proposals = self.bbox_transform(proposals, img_shape, scale_factor,
flip)
proposals = np.hstack([proposals, scores
]) if scores is not None else proposals
gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor,
flip)
if self.with_crowd:
gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape,
scale_factor, flip)
if self.with_mask:
gt_masks = self.mask_transform(ann['masks'], pad_shape,
scale_factor, flip)
一些增强样本,把背景输入:
{
"id": 50,
"image_id": 47,
"category_id": 1,
"segmentation": [],
"bbox": [],
"iscrowd": 0,
"area": 1.0
},
{
"id": 51,
"image_id": 48,
"category_id": 1,
"segmentation": [
[
61,
72,
59,
78,
58,
84,
58,
89,
61,
92,
64,
90,
67,
83,
68,
77,
66,
72
]
],
"bbox": [
58,
72,
10,
20
],
"iscrowd": 0,
"area": 1.0
},
{
"id": 52,
"image_id": 49,
"category_id": 1,
"segmentation": [],
"bbox": [],
"iscrowd": 0,
"area": 1.0
},
{
"id": 53,
"image_id": 50,
"category_id": 1,
"segmentation": [],
"bbox": [],
"iscrowd": 0,
"area": 1.0
},
{
"id": 54,
"image_id": 51,
"category_id": 1,
"segmentation": [],
"bbox": [],
"iscrowd": 0,
"area": 1.0
}
mmdetection/mmdet/datasets/coco.py
修改:
def _parse_ann_info(self, ann_info, with_mask=True):
"""Parse bbox and mask annotation.
Args:
ann_info (list[dict]): Annotation info of an image.
with_mask (bool): Whether to parse mask annotations.
Returns:
dict: A dict containing the following keys: bboxes, bboxes_ignore,
labels, masks, mask_polys, poly_lens.
"""
gt_bboxes = []
gt_labels = []
gt_bboxes_ignore = []
# Two formats are provided.
# 1. mask: a binary map of the same size of the image.
# 2. polys: each mask consists of one or several polys, each poly is a
# list of float.
if with_mask:
gt_masks = []
gt_mask_polys = []
gt_poly_lens = []
if len(ann_info)==1 and len(ann_info[0]['bbox']) == 0:
gt_bboxes = []
gt_labels = []
gt_bboxes_ignore = []
gt_masks = []
gt_mask_polys = []
gt_poly_lens = []
if gt_bboxes:
gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
gt_labels = np.array(gt_labels, dtype=np.int64)
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
if gt_bboxes_ignore:
gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
else:
gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
ann = dict(
bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)
if with_mask:
ann['masks'] = gt_masks
# poly format is not used in the current implementation
ann['mask_polys'] = gt_mask_polys
ann['poly_lens'] = gt_poly_lens
return ann
else:
for i, ann in enumerate(ann_info):
if ann.get('ignore', False):
continue
x1, y1, w, h = ann['bbox']
if ann['area'] <= 0 or w < 1 or h < 1:
continue
bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
if ann['iscrowd']:
gt_bboxes_ignore.append(bbox)
else:
gt_bboxes.append(bbox)
gt_labels.append(self.cat2label[ann['category_id']])
if with_mask:
gt_masks.append(self.coco.annToMask(ann))
mask_polys = [
p for p in ann['segmentation'] if len(p) >= 6
] # valid polygons have >= 3 points (6 coordinates)
poly_lens = [len(p) for p in mask_polys]
gt_mask_polys.append(mask_polys)
gt_poly_lens.extend(poly_lens)
if gt_bboxes:
gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
gt_labels = np.array(gt_labels, dtype=np.int64)
else:
gt_bboxes = np.zeros((0, 4), dtype=np.float32)
gt_labels = np.array([], dtype=np.int64)
if gt_bboxes_ignore:
gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
else:
gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
ann = dict(
bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)
if with_mask:
ann['masks'] = gt_masks
# poly format is not used in the current implementation
ann['mask_polys'] = gt_mask_polys
ann['poly_lens'] = gt_poly_lens
return ann
mmdetection/mmdet/datasets/custom.py
def prepare_train_img(self, idx):
img_info = self.img_infos[idx]
# load image
img = mmcv.imread(osp.join(self.img_prefix, img_info['filename']))
###yangninghua
#import cv2
#img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#import cv2
#img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
# corruption
if self.corruption is not None:
img = corrupt(
img,
severity=self.corruption_severity,
corruption_name=self.corruption)
# load proposals if necessary
if self.proposals is not None:
proposals = self.proposals[idx][:self.num_max_proposals]
# TODO: Handle empty proposals properly. Currently images with
# no proposals are just ignored, but they can be used for
# training in concept.
if len(proposals) == 0:
return None
if not (proposals.shape[1] == 4 or proposals.shape[1] == 5):
raise AssertionError(
'proposals should have shapes (n, 4) or (n, 5), '
'but found {}'.format(proposals.shape))
if proposals.shape[1] == 5:
scores = proposals[:, 4, None]
proposals = proposals[:, :4]
else:
scores = None
ann = self.get_ann_info(idx)
gt_bboxes = ann['bboxes']
gt_labels = ann['labels']
if self.with_crowd:
gt_bboxes_ignore = ann['bboxes_ignore']
# # skip the image if there is no valid gt bbox
# if len(gt_bboxes) == 0 and self.skip_img_without_anno:
# warnings.warn('Skip the image "%s" that has no valid gt bbox' %
# osp.join(self.img_prefix, img_info['filename']))
# return None
# skip the image if there is no valid gt bbox
if len(gt_bboxes) == 0 and self.skip_img_without_anno:
# apply transforms
flip = True if np.random.rand() < self.flip_ratio else False
# randomly sample a scale
img_scale = random_scale(self.img_scales, self.multiscale_mode)
img, img_shape, pad_shape, scale_factor = self.img_transform(
img, img_scale, flip, keep_ratio=self.resize_keep_ratio)
img = img.copy()
if self.with_seg:
gt_seg = mmcv.imread(
osp.join(self.seg_prefix,
img_info['filename'].replace('jpg', 'png')),
flag='unchanged')
gt_seg = self.seg_transform(gt_seg.squeeze(), img_scale, flip)
gt_seg = mmcv.imrescale(
gt_seg, self.seg_scale_factor, interpolation='nearest')
gt_seg = gt_seg[None, ...]
if self.proposals is not None:
proposals = self.bbox_transform(proposals, img_shape, scale_factor,
flip)
proposals = np.hstack([proposals, scores
]) if scores is not None else proposals
ori_shape = (img_info['height'], img_info['width'], 3)
img_meta = dict(
ori_shape=ori_shape,
img_shape=img_shape,
pad_shape=pad_shape,
scale_factor=scale_factor,
flip=flip)
data = dict(
img=DC(to_tensor(img), stack=True),
img_meta=DC(img_meta, cpu_only=True),
gt_bboxes=DC(to_tensor(gt_bboxes)))
if self.proposals is not None:
data['proposals'] = DC(to_tensor(proposals))
if self.with_label:
data['gt_labels'] = DC(to_tensor(gt_labels))
if self.with_crowd:
data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
if self.with_mask:
data['gt_masks'] = DC(ann['masks'], cpu_only=True)
if self.with_seg:
data['gt_semantic_seg'] = DC(to_tensor(gt_seg), stack=True)
return data
# extra augmentation
if self.extra_aug is not None:
img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes,
gt_labels)
# apply transforms
flip = True if np.random.rand() < self.flip_ratio else False
# randomly sample a scale
img_scale = random_scale(self.img_scales, self.multiscale_mode)
img, img_shape, pad_shape, scale_factor = self.img_transform(
img, img_scale, flip, keep_ratio=self.resize_keep_ratio)
img = img.copy()
if self.with_seg:
gt_seg = mmcv.imread(
osp.join(self.seg_prefix,
img_info['filename'].replace('jpg', 'png')),
flag='unchanged')
gt_seg = self.seg_transform(gt_seg.squeeze(), img_scale, flip)
gt_seg = mmcv.imrescale(
gt_seg, self.seg_scale_factor, interpolation='nearest')
gt_seg = gt_seg[None, ...]
if self.proposals is not None:
proposals = self.bbox_transform(proposals, img_shape, scale_factor,
flip)
proposals = np.hstack([proposals, scores
]) if scores is not None else proposals
gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor,
flip)
if self.with_crowd:
gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape,
scale_factor, flip)
if self.with_mask:
gt_masks = self.mask_transform(ann['masks'], pad_shape,
scale_factor, flip)
ori_shape = (img_info['height'], img_info['width'], 3)
img_meta = dict(
ori_shape=ori_shape,
img_shape=img_shape,
pad_shape=pad_shape,
scale_factor=scale_factor,
flip=flip)
data = dict(
img=DC(to_tensor(img), stack=True),
img_meta=DC(img_meta, cpu_only=True),
gt_bboxes=DC(to_tensor(gt_bboxes)))
if self.proposals is not None:
data['proposals'] = DC(to_tensor(proposals))
if self.with_label:
data['gt_labels'] = DC(to_tensor(gt_labels))
if self.with_crowd:
data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
if self.with_mask:
data['gt_masks'] = DC(gt_masks, cpu_only=True)
if self.with_seg:
data['gt_semantic_seg'] = DC(to_tensor(gt_seg), stack=True)
return data
转而经过:
def __getitem__(self, idx):
if self.test_mode:
return self.prepare_test_img(idx)
while True:
data = self.prepare_train_img(idx)
if data is None:
idx = self._rand_another(idx)
continue
return data
进行训练时候的主流程:
mmdetection/mmdetection/tools/train.py
mmdetection/mmdetection/mmdet/apis/train.py
mmcv/runner/runner.py
def run(self, data_loaders, workflow, max_epochs, **kwargs):
"""Start running.
Args:
data_loaders (list[:obj:`DataLoader`]): Dataloaders for training
and validation.
workflow (list[tuple]): A list of (phase, epochs) to specify the
running order and epochs. E.g, [('train', 2), ('val', 1)] means
running 2 epochs for training and 1 epoch for validation,
iteratively.
max_epochs (int): Total training epochs.
"""
assert isinstance(data_loaders, list)
assert mmcv.is_list_of(workflow, tuple)
assert len(data_loaders) == len(workflow)
self._max_epochs = max_epochs
work_dir = self.work_dir if self.work_dir is not None else 'NONE'
self.logger.info('Start running, host: %s, work_dir: %s',
get_host_info(), work_dir)
self.logger.info('workflow: %s, max: %d epochs', workflow, max_epochs)
self.call_hook('before_run')
while self.epoch < max_epochs:
for i, flow in enumerate(workflow):
mode, epochs = flow
if isinstance(mode, str): # self.train()
if not hasattr(self, mode):
raise ValueError(
'runner has no method named "{}" to run an epoch'.
format(mode))
epoch_runner = getattr(self, mode)
elif callable(mode): # custom train()
epoch_runner = mode
else:
raise TypeError('mode in workflow must be a str or '
'callable function, not {}'.format(
type(mode)))
for _ in range(epochs):
if mode == 'train' and self.epoch >= max_epochs:
return
epoch_runner(data_loaders[i], **kwargs)
time.sleep(1) # wait for some hooks like loggers to finish
self.call_hook('after_run')
epoch_runner(data_loaders[i], **kwargs)
mmcv/runner/runner.py
def train(self, data_loader, **kwargs):
self.model.train()
self.mode = 'train'
self.data_loader = data_loader
self._max_iters = self._max_epochs * len(data_loader)
self.call_hook('before_train_epoch')
for i, data_batch in enumerate(data_loader):
self._inner_iter = i
self.call_hook('before_train_iter')
outputs = self.batch_processor(
self.model, data_batch, train_mode=True, **kwargs)
if not isinstance(outputs, dict):
raise TypeError('batch_processor() must return a dict')
if 'log_vars' in outputs:
self.log_buffer.update(outputs['log_vars'],
outputs['num_samples'])
self.outputs = outputs
self.call_hook('after_train_iter')
self._iter += 1
self.call_hook('after_train_epoch')
self._epoch += 1
outputs = self.batch_processor(
self.model, data_batch, train_mode=True, **kwargs)
mmdetection/mmdet/apis/train.py
def batch_processor(model, data, train_mode):
losses = model(**data)
loss, log_vars = parse_losses(losses)
outputs = dict(
loss=loss, log_vars=log_vars, num_samples=len(data['img'].data))
return outputs
losses = model(**data)
torch/nn/modules/module.py
def __call__(self, *input, **kwargs):
for hook in self._forward_pre_hooks.values():
hook(self, input)
if torch._C._get_tracing_state():
result = self._slow_forward(*input, **kwargs)
else:
result = self.forward(*input, **kwargs)
for hook in self._forward_hooks.values():
hook_result = hook(self, input, result)
if hook_result is not None:
raise RuntimeError(
"forward hooks should never return any values, but '{}'"
"didn't return None".format(hook))
if len(self._backward_hooks) > 0:
var = result
while not isinstance(var, torch.Tensor):
if isinstance(var, dict):
var = next((v for v in var.values() if isinstance(v, torch.Tensor)))
else:
var = var[0]
grad_fn = var.grad_fn
if grad_fn is not None:
for hook in self._backward_hooks.values():
wrapper = functools.partial(hook, self)
functools.update_wrapper(wrapper, hook)
grad_fn.register_hook(wrapper)
return result
result = self.forward(*input, **kwargs)
torch/nn/parallel/data_parallel.py
def forward(self, *inputs, **kwargs):
if not self.device_ids:
return self.module(*inputs, **kwargs)
for t in chain(self.module.parameters(), self.module.buffers()):
if t.device != self.src_device_obj:
raise RuntimeError("module must have its parameters and buffers "
"on device {} (device_ids[0]) but found one of "
"them on device: {}".format(self.src_device_obj, t.device))
inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
if len(self.device_ids) == 1:
return self.module(*inputs[0], **kwargs[0])
replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
outputs = self.parallel_apply(replicas, inputs, kwargs)
return self.gather(outputs, self.output_device)
return self.module(*inputs[0], **kwargs[0])
torch/nn/modules/module.py
def __call__(self, *input, **kwargs):
for hook in self._forward_pre_hooks.values():
hook(self, input)
if torch._C._get_tracing_state():
result = self._slow_forward(*input, **kwargs)
else:
result = self.forward(*input, **kwargs)
for hook in self._forward_hooks.values():
hook_result = hook(self, input, result)
if hook_result is not None:
raise RuntimeError(
"forward hooks should never return any values, but '{}'"
"didn't return None".format(hook))
if len(self._backward_hooks) > 0:
var = result
while not isinstance(var, torch.Tensor):
if isinstance(var, dict):
var = next((v for v in var.values() if isinstance(v, torch.Tensor)))
else:
var = var[0]
grad_fn = var.grad_fn
if grad_fn is not None:
for hook in self._backward_hooks.values():
wrapper = functools.partial(hook, self)
functools.update_wrapper(wrapper, hook)
grad_fn.register_hook(wrapper)
return result
result = self.forward(*input, **kwargs)
mmdetection/mmdet/core/fp16/decorators.py
def auto_fp16_wrapper(old_func):
@functools.wraps(old_func)
def new_func(*args, **kwargs):
# check if the module has set the attribute `fp16_enabled`, if not,
# just fallback to the original method.
if not isinstance(args[0], torch.nn.Module):
raise TypeError('@auto_fp16 can only be used to decorate the '
'method of nn.Module')
if not (hasattr(args[0], 'fp16_enabled') and args[0].fp16_enabled):
return old_func(*args, **kwargs)
# get the arg spec of the decorated method
args_info = getfullargspec(old_func)
# get the argument names to be casted
args_to_cast = args_info.args if apply_to is None else apply_to
# convert the args that need to be processed
new_args = []
# NOTE: default args are not taken into consideration
if args:
arg_names = args_info.args[:len(args)]
for i, arg_name in enumerate(arg_names):
if arg_name in args_to_cast:
new_args.append(
cast_tensor_type(args[i], torch.float, torch.half))
else:
new_args.append(args[i])
# convert the kwargs that need to be processed
new_kwargs = {}
if kwargs:
for arg_name, arg_value in kwargs.items():
if arg_name in args_to_cast:
new_kwargs[arg_name] = cast_tensor_type(
arg_value, torch.float, torch.half)
else:
new_kwargs[arg_name] = arg_value
# apply converted arguments to the decorated method
output = old_func(*new_args, **new_kwargs)
# cast the results back to fp32 if necessary
if out_fp32:
output = cast_tensor_type(output, torch.half, torch.float)
return output
return new_func
return auto_fp16_wrapper
losses = model(**data)
mmdetection/mmdet/models/detectors/base.py
@auto_fp16(apply_to=('img', ))
def forward(self, img, img_meta, return_loss=True, **kwargs):
if return_loss:
return self.forward_train(img, img_meta, **kwargs)
else:
return self.forward_test(img, img_meta, **kwargs)
return self.forward_train(img, img_meta, **kwargs)
mmdetection/mmdet/models/detectors/two_stage.py
def forward_train(self,
img,
img_meta,
gt_bboxes,
gt_labels,
gt_bboxes_ignore=None,
gt_masks=None,
proposals=None):
x = self.extract_feat(img)
losses = dict()
# RPN forward and loss
if self.with_rpn:
rpn_outs = self.rpn_head(x)
rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,
self.train_cfg.rpn)
rpn_losses = self.rpn_head.loss(
*rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
losses.update(rpn_losses)
proposal_cfg = self.train_cfg.get('rpn_proposal',
self.test_cfg.rpn)
proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
else:
proposal_list = proposals
# assign gts and sample proposals
if self.with_bbox or self.with_mask:
bbox_assigner = build_assigner(self.train_cfg.rcnn.assigner)
bbox_sampler = build_sampler(
self.train_cfg.rcnn.sampler, context=self)
num_imgs = img.size(0)
if gt_bboxes_ignore is None:
gt_bboxes_ignore = [None for _ in range(num_imgs)]
sampling_results = []
for i in range(num_imgs):
assign_result = bbox_assigner.assign(proposal_list[i],
gt_bboxes[i],
gt_bboxes_ignore[i],
gt_labels[i])
sampling_result = bbox_sampler.sample(
assign_result,
proposal_list[i],
gt_bboxes[i],
gt_labels[i],
feats=[lvl_feat[i][None] for lvl_feat in x])
sampling_results.append(sampling_result)
# bbox head forward and loss
if self.with_bbox:
rois = bbox2roi([res.bboxes for res in sampling_results])
# TODO: a more flexible way to decide which feature maps to use
bbox_feats = self.bbox_roi_extractor(
x[:self.bbox_roi_extractor.num_inputs], rois)
if self.with_shared_head:
bbox_feats = self.shared_head(bbox_feats)
cls_score, bbox_pred = self.bbox_head(bbox_feats)
bbox_targets = self.bbox_head.get_target(sampling_results,
gt_bboxes, gt_labels,
self.train_cfg.rcnn)
loss_bbox = self.bbox_head.loss(cls_score, bbox_pred,
*bbox_targets)
losses.update(loss_bbox)
# mask head forward and loss
if self.with_mask:
if not self.share_roi_extractor:
pos_rois = bbox2roi(
[res.pos_bboxes for res in sampling_results])
mask_feats = self.mask_roi_extractor(
x[:self.mask_roi_extractor.num_inputs], pos_rois)
if self.with_shared_head:
mask_feats = self.shared_head(mask_feats)
else:
pos_inds = []
device = bbox_feats.device
for res in sampling_results:
pos_inds.append(
torch.ones(
res.pos_bboxes.shape[0],
device=device,
dtype=torch.uint8))
pos_inds.append(
torch.zeros(
res.neg_bboxes.shape[0],
device=device,
dtype=torch.uint8))
pos_inds = torch.cat(pos_inds)
mask_feats = bbox_feats[pos_inds]
mask_pred = self.mask_head(mask_feats)
mask_targets = self.mask_head.get_target(sampling_results,
gt_masks,
self.train_cfg.rcnn)
pos_labels = torch.cat(
[res.pos_gt_labels for res in sampling_results])
loss_mask = self.mask_head.loss(mask_pred, mask_targets,
pos_labels)
losses.update(loss_mask)
return losses
# RPN forward and loss
if self.with_rpn:
rpn_outs = self.rpn_head(x)
rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,
self.train_cfg.rpn)
rpn_losses = self.rpn_head.loss(
*rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
mmdetection/mmdet/models/anchor_heads/rpn_head.py
def loss(self,
cls_scores,
bbox_preds,
gt_bboxes,
img_metas,
cfg,
gt_bboxes_ignore=None):
losses = super(RPNHead, self).loss(
cls_scores,
bbox_preds,
gt_bboxes,
None,
img_metas,
cfg,
gt_bboxes_ignore=gt_bboxes_ignore)
return dict(
loss_rpn_cls=losses['loss_cls'], loss_rpn_bbox=losses['loss_bbox'])
后面接着:::
mmdetection/mmdet/core/fp16/decorators.py", line 127, in new_func
return old_func(*args, **kwargs)
mmdetection/mmdet/models/anchor_heads/anchor_head.py", line 179, in loss
sampling=self.sampling)
mmdetection/mmdet/core/anchor/anchor_target.py", line 63, in anchor_target
unmap_outputs=unmap_outputs)
mmdetection/mmdet/core/utils/misc.py", line 24, in multi_apply
return tuple(map(list, zip(*map_results)))
mmdetection/mmdet/core/anchor/anchor_target.py", line 116, in anchor_target_single
anchors, gt_bboxes, gt_bboxes_ignore, None, cfg)
mmdetection/mmdet/core/bbox/assign_sampling.py", line 30, in assign_and_sample
gt_labels)
mmdetection/mmdet/core/bbox/assigners/max_iou_assigner.py", line 74, in assign
raise ValueError('No gt or bboxes')
two_stage.py------rpn_head.py------anchor_head.py------anchor_target.py
搞了半天,原来他们已经开发了
请看mmdetection:https://github.com/open-mmlab/mmdetection/pull/1531
https://github.com/open-mmlab/mmdetection/issues/425
直接用就好,但一定是最新的master版本
https://github.com/open-mmlab/mmdetection/issues/2014
If you are using COCO dataset, you need to set filter_empty_gt = False
. See here and here for implementations.
https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/custom.py#L44
https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/coco.py#L52
自己的标志转labelme转coco
把没有标注的图片,当成背景训练,有image_id
但是没有ann
# -*- coding: utf-8 -*-
import sys, getopt
import os
import json
import cv2
import random
import numpy as np
np.random.seed(41)
import glob
import shutil
def mkdir_os(path):
if not os.path.exists(path):
os.makedirs(path)
#coco数据类型转换
class Lableme2CoCo:
def __init__(self, classname_to_id, jpgpng):
self.images = []
self.annotations = []
self.categories = []
self.img_id = 0
self.ann_id = 0
self.classname_to_id = classname_to_id
self.jpgpng = jpgpng
def save_coco_json(self, instance, save_path):
import io
#json.dump(instance, io.open(save_path, 'w', encoding='utf-8'), ensure_ascii=False, indent=1) # indent=2 更加美观显示
with io.open(save_path, 'w', encoding="utf-8") as outfile:
my_json_str = json.dumps(instance, ensure_ascii=False, indent=1)
#python3 无
# if isinstance(my_json_str, str):
# my_json_str = my_json_str.decode("utf-8")
outfile.write(my_json_str)
# 由json文件构建COCO
def to_coco(self, json_path_list, ori_path_new):
self._init_categories()
for json_path in json_path_list:
obj = self.read_jsonfile(json_path)
self.images.append(self._image(obj, json_path, ori_path_new))
shapes = obj['shapes']
if (len(shapes)==1) and len(shapes[0]["points"])==0:
self.img_id += 1
continue
for shape in shapes:
annotation = self._annotation(shape)
self.annotations.append(annotation)
self.ann_id += 1
self.img_id += 1
instance = {}
instance['info'] = 'spytensor created'
instance['license'] = ['license']
instance['images'] = self.images
instance['annotations'] = self.annotations
instance['categories'] = self.categories
return instance
# 构建类别
def _init_categories(self):
for k, v in self.classname_to_id.items():
category = {}
category['id'] = v
category['name'] = k
self.categories.append(category)
# 构建COCO的image字段
def _image(self, obj, path, ori_path):
image = {}
#from labelme import utils
#img_x = utils.img_b64_to_arr(obj['imageData'])
#---------------------------------------
#print(str(obj['imagePath']))
name = str(obj['imagePath']).split('/')[-1]
newname = os.path.join(ori_path,name)
img_x = cv2.imread(newname)
if img_x is None:
print('\nLableme2CoCo--error:')
exit()
h, w = img_x.shape[:-1]
image['height'] = h
image['width'] = w
image['id'] = self.img_id
image['file_name'] = os.path.basename(path).replace("json", self.jpgpng)
return image
# 构建COCO的annotation字段
def _annotation(self, shape):
label = shape['label']
points = shape['points']
annotation = {}
annotation['id'] = self.ann_id
annotation['image_id'] = self.img_id
annotation['category_id'] = int(self.classname_to_id[label])
if len(points) > 0:
annotation['segmentation'] = [np.asarray(points).flatten().tolist()]
annotation['bbox'] = self._get_box(points)
else:
annotation['segmentation'] = []
annotation['bbox'] = []
if int(shape['flags']) == 0:
annotation['iscrowd'] = 0
#annotation['ignore'] = 0
elif int(shape['flags']) == 1:
annotation['iscrowd'] = 1
#annotation['ignore'] = 1
annotation['area'] = 1.0
return annotation
# 读取json文件,返回一个json对象
def read_jsonfile(self, path):
import io
#with io.open(path, "r", encoding='utf-8') as f:
with open(path, "r") as f:
return json.load(f)
# COCO的格式: [x1,y1,w,h] 对应COCO的bbox格式
def _get_box(self, points):
min_x = min_y = np.inf
max_x = max_y = 0
for x, y in points:
min_x = min(min_x, x)
min_y = min(min_y, y)
max_x = max(max_x, x)
max_y = max(max_y, y)
return [min_x, min_y, max_x - min_x, max_y - min_y]
def main(argv):
inputFlag = 1
process2LabelmeFlag = 1
if inputFlag:
inputfile = ''
jsonfile = ''
classname = ''
outputfile = ''
try:
opts, args = getopt.getopt(argv,"hi:j:c:o:",["ifile=","jfile=","cname=","ofile="])
except getopt.GetoptError:
print('test.py -i -j -c -o ')
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print('test.py -i -j -c -o ')
sys.exit()
elif opt in ("-i", "--ifile"):
inputfile = arg
elif opt in ("-j", "--jfile"):
jsonfile = arg
elif opt in ("-c", "--cname"):
classname = arg
elif opt in ("-o", "--ofile"):
outputfile = arg
else:
inputfile = './seg_marker0123_photoneo'
jsonfile = 'seg_marker0123_photoneo_20200211_112204.json'
classname = 'luosi'
outputfile = './coco'
print('\n输入的文件为:', inputfile)
print('\n输入的json为:', jsonfile)
print('\nclassname为:', classname)
print('\n输出的文件为:', outputfile)
save_train = "./seg/train"
save_test = "./seg/test"
json_train = "./json/train"
json_test = "./json/test"
mkdir_os(save_train)
mkdir_os(save_test)
mkdir_os(json_train)
mkdir_os(json_test)
mkdir_os(outputfile)
#0为背景
classname_to_id = {classname: 1}
if process2LabelmeFlag:
# 获取数据
data = []
with open(jsonfile) as f:
for line in f:
data.append(json.loads(line))
num = 0
lendata_num = 0
count = len(data)
trainimg = os.listdir(inputfile)
# 遍历获取的数据
train_txt = open('train.txt',"w")
for lab in range(count):
onedate = data[lab]
name = onedate["url_image"]
name = str(name).split("/")[-1]
jpgpng = name.split(".")[-1]
if name not in trainimg:
if name.replace(".jpg",".png") not in trainimg:
continue
else:
name = name.replace(".jpg",".png")
jpgpng = "png"
img = cv2.imread(os.path.join(inputfile,name))
if img is None:
continue
temp_hh,temp_ww = img.shape[:2]
hh = temp_hh
ww = temp_ww
#train-test 随机数
tempNum = random.randint(1,10)
point_size = 3
thickness = 4
if(len(onedate["result"])==0):
#continue
json_jpg = {}
json_jpg["imagePath"] = str(os.path.join(inputfile, name))
json_jpg["imageData"] = None
shapes = []
points = []
one_shape = {}
one_shape["line_color"] = None
one_shape["shape_type"] = "polygon"
one_shape["points"] = points
one_shape["flags"] = 0
one_shape["fill_color"] = None
one_shape["label"] = classname
shapes.append(one_shape)
json_jpg["shapes"] = shapes
json_jpg["version"] = "3.16.7"
json_jpg["flags"] = {}
json_jpg["fillColor"] = [
255,
0,
0,
128
]
json_jpg["lineColor"] = [
0,
255,
0,
128
]
json_jpg["imageWidth"] = ww
json_jpg["imageHeight"] = hh
# jsonData = json.dumps(json_jpg, ensure_ascii=False, indent=1)
jsonData = json.dumps(json_jpg, indent=1)
jsonname = name.split(".")[0]
jsonname = jsonname + ".json"
# 分割的保存
if tempNum == 1 or tempNum == 2:
cv2.imwrite(os.path.join(save_test, name), img)
fileObject = open(os.path.join(json_test, jsonname), 'w')
fileObject.write(jsonData)
fileObject.close()
else:
cv2.imwrite(os.path.join(save_train, name), img)
fileObject = open(os.path.join(json_train, jsonname), 'w')
fileObject.write(jsonData)
fileObject.close()
elif 'data' in onedate["result"] or 'data' in onedate["result"][0]:
json_jpg={}
json_jpg["imagePath"] = str(os.path.join(inputfile,name))
json_jpg["imageData"] = None
shapes=[]
for key in range(len(onedate["result"])):
ndata = onedate["result"][key]["data"]
if len(ndata)< 8:
continue
points=[]
# ignore 黄色标出
if onedate["result"][key]["tagtype"] in "purpose2":
for k in range(len(ndata)//2):
cv2.circle(img, (ndata[2*k],ndata[2*k+1]), point_size, (0, 255, 255), thickness)
points.append([ndata[2*k],ndata[2*k+1]])
# add 红色标出
elif onedate["result"][key]["tagtype"] in "purpose1":
for k in range(len(ndata)//2):
cv2.circle(img, (ndata[2*k],ndata[2*k+1]), point_size, (0, 0, 255), thickness)
points.append([ndata[2*k],ndata[2*k+1]])
# 特殊情况 蓝色标出
else:
for k in range(len(ndata)//2):
cv2.circle(img, (ndata[2*k],ndata[2*k+1]), point_size, (255, 0, 0), thickness)
points.append([ndata[2*k],ndata[2*k+1]])
one_shape = {}
one_shape["line_color"] = None
one_shape["shape_type"] = "polygon"
one_shape["points"] = points
# 判断是否是ignore或者特殊情况,给出flag
# ignore
if onedate["result"][key]["tagtype"] in "purpose2":
one_shape["flags"] = 1
# add
elif onedate["result"][key]["tagtype"] in "purpose1":
one_shape["flags"] = 0
# 特殊情况
else:
one_shape["flags"] = 1
one_shape["fill_color"] = None
one_shape["label"] = classname
shapes.append(one_shape)
json_jpg["shapes"] = shapes
json_jpg["version"] = "3.16.7"
json_jpg["flags"] = {}
json_jpg["fillColor"] = [
255,
0,
0,
128
]
json_jpg["lineColor"] = [
0,
255,
0,
128
]
json_jpg["imageWidth"] = ww
json_jpg["imageHeight"] = hh
#jsonData = json.dumps(json_jpg, ensure_ascii=False, indent=1)
jsonData = json.dumps(json_jpg, indent=1)
jsonname = name.split(".")[0]
jsonname = jsonname+".json"
#分割的保存
if tempNum == 1 or tempNum == 2 or tempNum == 3 or tempNum == 4:
cv2.imwrite(os.path.join(save_test,name),img)
fileObject = open(os.path.join(json_test,jsonname), 'w')
fileObject.write(jsonData)
fileObject.close()
else:
cv2.imwrite(os.path.join(save_train,name),img)
fileObject = open(os.path.join(json_train,jsonname), 'w')
fileObject.write(jsonData)
fileObject.close()
else:
continue
txtname = name.split(".")[0]
train_txt.write(txtname)
train_txt.write("\n")
num += 1
print(num,"/",count)
print("lendata_num:",num)
train_txt.close()
else:
jpgpng = "png"
print('\njpgpng文件后缀人为设置为,如出bug需要修改:', jpgpng)
# 创建文件
if not os.path.exists("%s/annotations/"%outputfile):
os.makedirs("%s/annotations/"%outputfile)
if not os.path.exists("%s/train2017/"%outputfile):
os.makedirs("%s/train2017"%outputfile)
if not os.path.exists("%s/val2017/"%outputfile):
os.makedirs("%s/val2017"%outputfile)
# 获取images目录下所有的joson文件列表
json_list_train = glob.glob(json_train + "/*.json")
# 获取images目录下所有的joson文件列表
json_list_test = glob.glob(json_test + "/*.json")
print("train_n:", len(json_list_train), 'val_n:', len(json_list_test))
# 把训练集转化为COCO的json格式
if len(json_list_train):
l2c_train = Lableme2CoCo(classname_to_id, jpgpng)
train_instance = l2c_train.to_coco(json_list_train, inputfile)
l2c_train.save_coco_json(train_instance, '%s/annotations/instances_train2017.json'%outputfile)
for file in json_list_train:
name = file.split('/')[-1]
name = os.path.join(inputfile,name)
shutil.copy(name.replace("json", jpgpng),"%s/train2017/"%outputfile)
if len(json_list_test):
# 把验证集转化为COCO的json格式
l2c_val = Lableme2CoCo(classname_to_id, jpgpng)
val_instance = l2c_val.to_coco(json_list_test, inputfile)
l2c_val.save_coco_json(val_instance, '%s/annotations/instances_val2017.json'%outputfile)
for file in json_list_test:
name = file.split('/')[-1]
name = os.path.join(inputfile,name)
shutil.copy(name.replace("json", jpgpng),"%s/val2017/"%outputfile)
if __name__ == "__main__":
main(sys.argv[1:])