要支持新的数据格式,可以将它们转换为现有格式(COCO格式或PASCAL格式),也可以直接将它们转换为中间格式。您还可以选择离线转换(在通过脚本进行训练之前)或在线转换(实现一个新的数据集并在训练时进行转换)。在MMDetection中,我们建议将数据转换为coco格式并离线进行转换,这样你只需要在数据转换后修改配置的数据注释路径和类。
最简单的方法是将数据集转换为现有的数据集格式(COCO或PASCAL VOC)。
oco格式的注释json文件有以下必要的键:
'images': [
{
'file_name': 'COCO_val2014_000000001268.jpg',
'height': 427,
'width': 640,
'id': 1268
},
...
],
'annotations': [
{
'segmentation': [[192.81,
247.09,
...
219.03,
249.06]], # if you have mask labels
'area': 1035.749,
'iscrowd': 0,
'image_id': 1268,
'bbox': [192.81, 224.8, 74.73, 33.43],
'category_id': 16,
'id': 42986
},
...
],
'categories': [
{'id': 0, 'name': 'car'},
]
数据预处理完成后,用户可以通过两个步骤将定制的新数据集训练成现有格式(如coco格式)。
在这里,我们给出了一个示例来展示上述两个步骤,它使用一个CoCO格式的5个类的定制数据集来训练一个现有的Cascade Mask R-CNN R50-FPN检测器。
配置文件的修改涉及两个方面:
# the new config inherits the base configs to highlight the necessary modification
_base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py'
# 1. dataset settings
dataset_type = 'CocoDataset'
classes = ('a', 'b', 'c', 'd', 'e') #修改为自定义的数据类别名称
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
# explicitly add your class names to the field `classes`
classes=classes,
ann_file='path/to/your/train/annotation_data', #修改为自己的数据路径
img_prefix='path/to/your/train/image_data'),
val=dict(
type=dataset_type,
# explicitly add your class names to the field `classes`
classes=classes,
ann_file='path/to/your/val/annotation_data',
img_prefix='path/to/your/val/image_data'),
test=dict(
type=dataset_type,
# explicitly add your class names to the field `classes`
classes=classes,
ann_file='path/to/your/test/annotation_data',
img_prefix='path/to/your/test/image_data'))
# 2. model settings
# explicitly over-write all the `num_classes` field from default 80 to 5.
model = dict(
roi_head=dict(
bbox_head=[
dict(
type='Shared2FCBBoxHead',
# explicitly over-write all the `num_classes` field from default 80 to 5.
num_classes=5),
dict(
type='Shared2FCBBoxHead',
# explicitly over-write all the `num_classes` field from default 80 to 5.
num_classes=5),
dict(
type='Shared2FCBBoxHead',
# explicitly over-write all the `num_classes` field from default 80 to 5.
num_classes=5)],
# explicitly over-write all the `num_classes` field from default 80 to 5.
mask_head=dict(num_classes=5)))
假设你定制的数据集是CoCO格式,确保你在定制的数据集中有正确的注释:
'annotations': [
{
'segmentation': [[192.81,
247.09,
...
219.03,
249.06]], # if you have mask labels
'area': 1035.749,
'iscrowd': 0,
'image_id': 1268,
'bbox': [192.81, 224.8, 74.73, 33.43],
'category_id': 16,
'id': 42986
},
...
],
# MMDetection automatically maps the uncontinuous `id` to the continuous label indices.
'categories': [
{'id': 1, 'name': 'a'}, {'id': 3, 'name': 'b'}, {'id': 4, 'name': 'c'}, {'id': 16, 'name': 'd'}, {'id': 17, 'name': 'e'},
]
对复杂城市街景的视觉理解是广泛应用的有利因素。对象检测极大地受益于大规模数据集,特别是在深度学习的背景下。然而,对于语义城市场景的理解,目前还没有足够的数据集来捕捉现实世界城市场景的复杂性。为了解决这个问题,我们引入了cityscape、一个基准套件和大规模数据集来训练和测试像素级和实例级语义标记的方法。《城市景观》由一组大型的、多样化的立体视频序列组成,这些视频序列记录在50个不同城市的街道上。其中5000幅图像具有高质量的像素级注释;另外有20000张图像具有粗注释,以支持利用大量弱标记数据的方法。至关重要的是,我们的努力在数据集的大小、注释的丰富性、场景的可变性和复杂性方面超过了以前的尝试。我们附带的实证研究提供了对数据集特征的深入分析,以及基于我们的基准的几种最先进方法的性能评估。
cityscapes to coco code
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import glob
import os.path as osp
import cityscapesscripts.helpers.labels as CSLabels
import mmcv
import numpy as np
import pycocotools.mask as maskUtils
def collect_files(img_dir, gt_dir):
suffix = 'leftImg8bit.png'
files = []
for img_file in glob.glob(osp.join(img_dir, '**/*.png')):
assert img_file.endswith(suffix), img_file
inst_file = gt_dir + img_file[
len(img_dir):-len(suffix)] + 'gtFine_instanceIds.png'
# Note that labelIds are not converted to trainId for seg map
segm_file = gt_dir + img_file[
len(img_dir):-len(suffix)] + 'gtFine_labelIds.png'
files.append((img_file, inst_file, segm_file))
assert len(files), f'No images found in {img_dir}'
print(f'Loaded {len(files)} images from {img_dir}')
return files
def collect_annotations(files, nproc=1):
print('Loading annotation images')
if nproc > 1:
images = mmcv.track_parallel_progress(
load_img_info, files, nproc=nproc)
else:
images = mmcv.track_progress(load_img_info, files)
return images
def load_img_info(files):
img_file, inst_file, segm_file = files
inst_img = mmcv.imread(inst_file, 'unchanged')
# ids < 24 are stuff labels (filtering them first is about 5% faster)
unique_inst_ids = np.unique(inst_img[inst_img >= 24])
anno_info = []
for inst_id in unique_inst_ids:
# For non-crowd annotations, inst_id // 1000 is the label_id
# Crowd annotations have <1000 instance ids
label_id = inst_id // 1000 if inst_id >= 1000 else inst_id
label = CSLabels.id2label[label_id]
if not label.hasInstances or label.ignoreInEval:
continue
category_id = label.id
iscrowd = int(inst_id < 1000)
mask = np.asarray(inst_img == inst_id, dtype=np.uint8, order='F')
mask_rle = maskUtils.encode(mask[:, :, None])[0]
area = maskUtils.area(mask_rle)
# convert to COCO style XYWH format
bbox = maskUtils.toBbox(mask_rle)
# for json encoding
mask_rle['counts'] = mask_rle['counts'].decode()
anno = dict(
iscrowd=iscrowd,
category_id=category_id,
bbox=bbox.tolist(),
area=area.tolist(),
segmentation=mask_rle)
anno_info.append(anno)
video_name = osp.basename(osp.dirname(img_file))
img_info = dict(
# remove img_prefix for filename
file_name=osp.join(video_name, osp.basename(img_file)),
height=inst_img.shape[0],
width=inst_img.shape[1],
anno_info=anno_info,
segm_file=osp.join(video_name, osp.basename(segm_file)))
return img_info
def cvt_annotations(image_infos, out_json_name):
out_json = dict()
img_id = 0
ann_id = 0
out_json['images'] = []
out_json['categories'] = []
out_json['annotations'] = []
for image_info in image_infos:
image_info['id'] = img_id
anno_infos = image_info.pop('anno_info')
out_json['images'].append(image_info)
for anno_info in anno_infos:
anno_info['image_id'] = img_id
anno_info['id'] = ann_id
out_json['annotations'].append(anno_info)
ann_id += 1
img_id += 1
for label in CSLabels.labels:
if label.hasInstances and not label.ignoreInEval:
cat = dict(id=label.id, name=label.name)
out_json['categories'].append(cat)
if len(out_json['annotations']) == 0:
out_json.pop('annotations')
mmcv.dump(out_json, out_json_name)
return out_json
def parse_args():
parser = argparse.ArgumentParser(
description='Convert Cityscapes annotations to COCO format')
parser.add_argument('cityscapes_path', help='cityscapes data path')
parser.add_argument('--img-dir', default='leftImg8bit', type=str)
parser.add_argument('--gt-dir', default='gtFine', type=str)
parser.add_argument('-o', '--out-dir', help='output path')
parser.add_argument(
'--nproc', default=1, type=int, help='number of process')
args = parser.parse_args()
return args
def main():
args = parse_args()
cityscapes_path = args.cityscapes_path
out_dir = args.out_dir if args.out_dir else cityscapes_path
mmcv.mkdir_or_exist(out_dir)
img_dir = osp.join(cityscapes_path, args.img_dir)
gt_dir = osp.join(cityscapes_path, args.gt_dir)
set_name = dict(
train='instancesonly_filtered_gtFine_train.json',
val='instancesonly_filtered_gtFine_val.json',
test='instancesonly_filtered_gtFine_test.json')
for split, json_name in set_name.items():
print(f'Converting {split} into {json_name}')
with mmcv.Timer(
print_tmpl='It took {}s to convert Cityscapes annotation'):
files = collect_files(
osp.join(img_dir, split), osp.join(gt_dir, split))
image_infos = collect_annotations(files, nproc=args.nproc)
cvt_annotations(image_infos, osp.join(out_dir, json_name))
if __name__ == '__main__':
main()
cityscapes finetun config
如果您不想将注释格式转换为COCO或PASCAL格式,也可以这样做。实际上,我们定义了一个简单的注释格式,所有现有的数据集都被处理成与它兼容,不管是在线的还是离线的。
数据集的注释是一个字典列表,每个字典对应一个图像。有3个字段文件名(相对路径),宽度,高度用于测试,和一个额外的字段ann用于培训。Ann也是一个至少包含两个字段的字典:box和label,它们都是numpy数组。一些数据集可能会提供像crowd/difficult/ignored box这样的注释,我们使用bboxes_ignore和labels_ignore来覆盖它们。
[
{
'filename': 'a.jpg',
'width': 1280,
'height': 720,
'ann': {
'bboxes': <np.ndarray, float32> (n, 4),
'labels': <np.ndarray, int64> (n, ),
'bboxes_ignore': <np.ndarray, float32> (k, 4),
'labels_ignore': <np.ndarray, int64> (k, ) (optional field)
}
},
...
]
有两种方法可以处理自定义数据集。
您可以将注释格式转换为上述预期格式,并将其保存到pickle或json文件,like pascal_voc.py
一个自定义数据集的示例
假设注释在文本文件中是一种新格式。边界框注释存储在文本文件注释中。TXT文件如下所示。
在这里插入代码片#
000001.jpg
1280 720
2
10 20 40 60 1
20 40 50 60 2
#
000002.jpg
1280 720
3
50 20 40 60 2
20 40 30 45 2
30 40 50 60 3
我们可以在mmdet/datasets/my_data .py中创建一个新的数据集来加载数据。
import mmcv
import numpy as np
from .builder import DATASETS
from .custom import CustomDataset
@DATASETS.register_module()
class MyDataset(CustomDataset):
CLASSES = ('person', 'bicycle', 'car', 'motorcycle')
def load_annotations(self, ann_file):
ann_list = mmcv.list_from_file(ann_file)
data_infos = []
for i, ann_line in enumerate(ann_list):
if ann_line != '#':
continue
img_shape = ann_list[i + 2].split(' ')
width = int(img_shape[0])
height = int(img_shape[1])
bbox_number = int(ann_list[i + 3])
anns = ann_line.split(' ')
bboxes = []
labels = []
for anns in ann_list[i + 4:i + 4 + bbox_number]:
bboxes.append([float(ann) for ann in anns[:4]])
labels.append(int(anns[4]))
data_infos.append(
dict(
filename=ann_list[i + 1],
width=width,
height=height,
ann=dict(
bboxes=np.array(bboxes).astype(np.float32),
labels=np.array(labels).astype(np.int64))
))
return data_infos
def get_ann_info(self, idx):
return self.data_infos[idx]['ann']
然后在配置中,要使用MyDataset,您可以如下所示修改配置
dataset_A_train = dict(
type='MyDataset',
ann_file = 'image_list.txt',
pipeline=train_pipeline
)
MMDetection还支持许多数据集包装器来混合数据集或修改数据集分布。目前它支持以下三种数据集包装器:
repeatdataset:简单重复整个数据集。
ClassBalancedDataset:以类均衡的方式重复数据集。
ConcatDataset: concat数据集。
我们使用RepeatDataset作为包装器来重复数据集。
例如,假设原始数据集是Dataset_A,为了重复它,配置如下所示
dataset_A_train = dict(
type='RepeatDataset',
times=N,
dataset=dict( # This is the original config of Dataset_A
type='Dataset_A',
...
pipeline=train_pipeline
)
)
我们使用ClassBalancedDataset作为包装器,以基于类别频率重复数据集。
要重复的数据集需要实例化函数本身。get_cat_ids (idx)支持ClassBalancedDataset。例如,要使用overample_thr =1e-3重复Dataset_A,配置如下所示
dataset_A_train = dict(
type='ClassBalancedDataset',
oversample_thr=1e-3,
dataset=dict( # This is the original config of Dataset_A
type='Dataset_A',
...
pipeline=train_pipeline
)
)
# Copyright (c) OpenMMLab. All rights reserved.
import bisect
import collections
import copy
import math
from collections import defaultdict
import numpy as np
from mmcv.utils import build_from_cfg, print_log
from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
from .builder import DATASETS, PIPELINES
from .coco import CocoDataset
@DATASETS.register_module()
class ConcatDataset(_ConcatDataset):
"""A wrapper of concatenated dataset.
Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but
concat the group flag for image aspect ratio.
Args:
datasets (list[:obj:`Dataset`]): A list of datasets.
separate_eval (bool): Whether to evaluate the results
separately if it is used as validation dataset.
Defaults to True.
"""
def __init__(self, datasets, separate_eval=True):
super(ConcatDataset, self).__init__(datasets)
self.CLASSES = datasets[0].CLASSES
self.PALETTE = getattr(datasets[0], 'PALETTE', None)
self.separate_eval = separate_eval
if not separate_eval:
if any([isinstance(ds, CocoDataset) for ds in datasets]):
raise NotImplementedError(
'Evaluating concatenated CocoDataset as a whole is not'
' supported! Please set "separate_eval=True"')
elif len(set([type(ds) for ds in datasets])) != 1:
raise NotImplementedError(
'All the datasets should have same types')
if hasattr(datasets[0], 'flag'):
flags = []
for i in range(0, len(datasets)):
flags.append(datasets[i].flag)
self.flag = np.concatenate(flags)
def get_cat_ids(self, idx):
"""Get category ids of concatenated dataset by index.
Args:
idx (int): Index of data.
Returns:
list[int]: All categories in the image of specified index.
"""
if idx < 0:
if -idx > len(self):
raise ValueError(
'absolute value of index should not exceed dataset length')
idx = len(self) + idx
dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
if dataset_idx == 0:
sample_idx = idx
else:
sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
return self.datasets[dataset_idx].get_cat_ids(sample_idx)
def get_ann_info(self, idx):
"""Get annotation of concatenated dataset by index.
Args:
idx (int): Index of data.
Returns:
dict: Annotation info of specified index.
"""
if idx < 0:
if -idx > len(self):
raise ValueError(
'absolute value of index should not exceed dataset length')
idx = len(self) + idx
dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
if dataset_idx == 0:
sample_idx = idx
else:
sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
return self.datasets[dataset_idx].get_ann_info(sample_idx)
def evaluate(self, results, logger=None, **kwargs):
"""Evaluate the results.
Args:
results (list[list | tuple]): Testing results of the dataset.
logger (logging.Logger | str | None): Logger used for printing
related information during evaluation. Default: None.
Returns:
dict[str: float]: AP results of the total dataset or each separate
dataset if `self.separate_eval=True`.
"""
assert len(results) == self.cumulative_sizes[-1], \
('Dataset and results have different sizes: '
f'{self.cumulative_sizes[-1]} v.s. {len(results)}')
# Check whether all the datasets support evaluation
for dataset in self.datasets:
assert hasattr(dataset, 'evaluate'), \
f'{type(dataset)} does not implement evaluate function'
if self.separate_eval:
dataset_idx = -1
total_eval_results = dict()
for size, dataset in zip(self.cumulative_sizes, self.datasets):
start_idx = 0 if dataset_idx == -1 else \
self.cumulative_sizes[dataset_idx]
end_idx = self.cumulative_sizes[dataset_idx + 1]
results_per_dataset = results[start_idx:end_idx]
print_log(
f'\nEvaluateing {dataset.ann_file} with '
f'{len(results_per_dataset)} images now',
logger=logger)
eval_results_per_dataset = dataset.evaluate(
results_per_dataset, logger=logger, **kwargs)
dataset_idx += 1
for k, v in eval_results_per_dataset.items():
total_eval_results.update({f'{dataset_idx}_{k}': v})
return total_eval_results
elif any([isinstance(ds, CocoDataset) for ds in self.datasets]):
raise NotImplementedError(
'Evaluating concatenated CocoDataset as a whole is not'
' supported! Please set "separate_eval=True"')
elif len(set([type(ds) for ds in self.datasets])) != 1:
raise NotImplementedError(
'All the datasets should have same types')
else:
original_data_infos = self.datasets[0].data_infos
self.datasets[0].data_infos = sum(
[dataset.data_infos for dataset in self.datasets], [])
eval_results = self.datasets[0].evaluate(
results, logger=logger, **kwargs)
self.datasets[0].data_infos = original_data_infos
return eval_results
@DATASETS.register_module()
class RepeatDataset:
"""A wrapper of repeated dataset.
The length of repeated dataset will be `times` larger than the original
dataset. This is useful when the data loading time is long but the dataset
is small. Using RepeatDataset can reduce the data loading time between
epochs.
Args:
dataset (:obj:`Dataset`): The dataset to be repeated.
times (int): Repeat times.
"""
def __init__(self, dataset, times):
self.dataset = dataset
self.times = times
self.CLASSES = dataset.CLASSES
self.PALETTE = getattr(dataset, 'PALETTE', None)
if hasattr(self.dataset, 'flag'):
self.flag = np.tile(self.dataset.flag, times)
self._ori_len = len(self.dataset)
def __getitem__(self, idx):
return self.dataset[idx % self._ori_len]
def get_cat_ids(self, idx):
"""Get category ids of repeat dataset by index.
Args:
idx (int): Index of data.
Returns:
list[int]: All categories in the image of specified index.
"""
return self.dataset.get_cat_ids(idx % self._ori_len)
def get_ann_info(self, idx):
"""Get annotation of repeat dataset by index.
Args:
idx (int): Index of data.
Returns:
dict: Annotation info of specified index.
"""
return self.dataset.get_ann_info(idx % self._ori_len)
def __len__(self):
"""Length after repetition."""
return self.times * self._ori_len
# Modified from https://github.com/facebookresearch/detectron2/blob/41d475b75a230221e21d9cac5d69655e3415e3a4/detectron2/data/samplers/distributed_sampler.py#L57 # noqa
@DATASETS.register_module()
class ClassBalancedDataset:
"""A wrapper of repeated dataset with repeat factor.
Suitable for training on class imbalanced datasets like LVIS. Following
the sampling strategy in the `paper <https://arxiv.org/abs/1908.03195>`_,
in each epoch, an image may appear multiple times based on its
"repeat factor".
The repeat factor for an image is a function of the frequency the rarest
category labeled in that image. The "frequency of category c" in [0, 1]
is defined by the fraction of images in the training set (without repeats)
in which category c appears.
The dataset needs to instantiate :func:`self.get_cat_ids` to support
ClassBalancedDataset.
The repeat factor is computed as followed.
1. For each category c, compute the fraction # of images
that contain it: :math:`f(c)`
2. For each category c, compute the category-level repeat factor:
:math:`r(c) = max(1, sqrt(t/f(c)))`
3. For each image I, compute the image-level repeat factor:
:math:`r(I) = max_{c in I} r(c)`
Args:
dataset (:obj:`CustomDataset`): The dataset to be repeated.
oversample_thr (float): frequency threshold below which data is
repeated. For categories with ``f_c >= oversample_thr``, there is
no oversampling. For categories with ``f_c < oversample_thr``, the
degree of oversampling following the square-root inverse frequency
heuristic above.
filter_empty_gt (bool, optional): If set true, images without bounding
boxes will not be oversampled. Otherwise, they will be categorized
as the pure background class and involved into the oversampling.
Default: True.
"""
def __init__(self, dataset, oversample_thr, filter_empty_gt=True):
self.dataset = dataset
self.oversample_thr = oversample_thr
self.filter_empty_gt = filter_empty_gt
self.CLASSES = dataset.CLASSES
self.PALETTE = getattr(dataset, 'PALETTE', None)
repeat_factors = self._get_repeat_factors(dataset, oversample_thr)
repeat_indices = []
for dataset_idx, repeat_factor in enumerate(repeat_factors):
repeat_indices.extend([dataset_idx] * math.ceil(repeat_factor))
self.repeat_indices = repeat_indices
flags = []
if hasattr(self.dataset, 'flag'):
for flag, repeat_factor in zip(self.dataset.flag, repeat_factors):
flags.extend([flag] * int(math.ceil(repeat_factor)))
assert len(flags) == len(repeat_indices)
self.flag = np.asarray(flags, dtype=np.uint8)
def _get_repeat_factors(self, dataset, repeat_thr):
"""Get repeat factor for each images in the dataset.
Args:
dataset (:obj:`CustomDataset`): The dataset
repeat_thr (float): The threshold of frequency. If an image
contains the categories whose frequency below the threshold,
it would be repeated.
Returns:
list[float]: The repeat factors for each images in the dataset.
"""
# 1. For each category c, compute the fraction # of images
# that contain it: f(c)
category_freq = defaultdict(int)
num_images = len(dataset)
for idx in range(num_images):
cat_ids = set(self.dataset.get_cat_ids(idx))
if len(cat_ids) == 0 and not self.filter_empty_gt:
cat_ids = set([len(self.CLASSES)])
for cat_id in cat_ids:
category_freq[cat_id] += 1
for k, v in category_freq.items():
category_freq[k] = v / num_images
# 2. For each category c, compute the category-level repeat factor:
# r(c) = max(1, sqrt(t/f(c)))
category_repeat = {
cat_id: max(1.0, math.sqrt(repeat_thr / cat_freq))
for cat_id, cat_freq in category_freq.items()
}
# 3. For each image I, compute the image-level repeat factor:
# r(I) = max_{c in I} r(c)
repeat_factors = []
for idx in range(num_images):
cat_ids = set(self.dataset.get_cat_ids(idx))
if len(cat_ids) == 0 and not self.filter_empty_gt:
cat_ids = set([len(self.CLASSES)])
repeat_factor = 1
if len(cat_ids) > 0:
repeat_factor = max(
{category_repeat[cat_id]
for cat_id in cat_ids})
repeat_factors.append(repeat_factor)
return repeat_factors
def __getitem__(self, idx):
ori_index = self.repeat_indices[idx]
return self.dataset[ori_index]
def get_ann_info(self, idx):
"""Get annotation of dataset by index.
Args:
idx (int): Index of data.
Returns:
dict: Annotation info of specified index.
"""
ori_index = self.repeat_indices[idx]
return self.dataset.get_ann_info(ori_index)
def __len__(self):
"""Length after repetition."""
return len(self.repeat_indices)
@DATASETS.register_module()
class MultiImageMixDataset:
"""A wrapper of multiple images mixed dataset.
Suitable for training on multiple images mixed data augmentation like
mosaic and mixup. For the augmentation pipeline of mixed image data,
the `get_indexes` method needs to be provided to obtain the image
indexes, and you can set `skip_flags` to change the pipeline running
process. At the same time, we provide the `dynamic_scale` parameter
to dynamically change the output image size.
Args:
dataset (:obj:`CustomDataset`): The dataset to be mixed.
pipeline (Sequence[dict]): Sequence of transform object or
config dict to be composed.
dynamic_scale (tuple[int], optional): The image scale can be changed
dynamically. Default to None. It is deprecated.
skip_type_keys (list[str], optional): Sequence of type string to
be skip pipeline. Default to None.
max_refetch (int): The maximum number of retry iterations for getting
valid results from the pipeline. If the number of iterations is
greater than `max_refetch`, but results is still None, then the
iteration is terminated and raise the error. Default: 15.
"""
def __init__(self,
dataset,
pipeline,
dynamic_scale=None,
skip_type_keys=None,
max_refetch=15):
if dynamic_scale is not None:
raise RuntimeError(
'dynamic_scale is deprecated. Please use Resize pipeline '
'to achieve similar functions')
assert isinstance(pipeline, collections.abc.Sequence)
if skip_type_keys is not None:
assert all([
isinstance(skip_type_key, str)
for skip_type_key in skip_type_keys
])
self._skip_type_keys = skip_type_keys
self.pipeline = []
self.pipeline_types = []
for transform in pipeline:
if isinstance(transform, dict):
self.pipeline_types.append(transform['type'])
transform = build_from_cfg(transform, PIPELINES)
self.pipeline.append(transform)
else:
raise TypeError('pipeline must be a dict')
self.dataset = dataset
self.CLASSES = dataset.CLASSES
self.PALETTE = getattr(dataset, 'PALETTE', None)
if hasattr(self.dataset, 'flag'):
self.flag = dataset.flag
self.num_samples = len(dataset)
self.max_refetch = max_refetch
def __len__(self):
return self.num_samples
def __getitem__(self, idx):
results = copy.deepcopy(self.dataset[idx])
for (transform, transform_type) in zip(self.pipeline,
self.pipeline_types):
if self._skip_type_keys is not None and \
transform_type in self._skip_type_keys:
continue
if hasattr(transform, 'get_indexes'):
for i in range(self.max_refetch):
# Make sure the results passed the loading pipeline
# of the original dataset is not None.
indexes = transform.get_indexes(self.dataset)
if not isinstance(indexes, collections.abc.Sequence):
indexes = [indexes]
mix_results = [
copy.deepcopy(self.dataset[index]) for index in indexes
]
if None not in mix_results:
results['mix_results'] = mix_results
break
else:
raise RuntimeError(
'The loading pipeline of the original dataset'
' always return None. Please check the correctness '
'of the dataset and its pipeline.')
for i in range(self.max_refetch):
# To confirm the results passed the training pipeline
# of the wrapper is not None.
updated_results = transform(copy.deepcopy(results))
if updated_results is not None:
results = updated_results
break
else:
raise RuntimeError(
'The training pipeline of the dataset wrapper'
' always return None.Please check the correctness '
'of the dataset and its pipeline.')
if 'mix_results' in results:
results.pop('mix_results')
return results
def update_skip_type_keys(self, skip_type_keys):
"""Update skip_type_keys. It is called by an external hook.
Args:
skip_type_keys (list[str], optional): Sequence of type
string to be skip pipeline.
"""
assert all([
isinstance(skip_type_key, str) for skip_type_key in skip_type_keys
])
self._skip_type_keys = skip_type_keys
有三种连接数据集的方法
注意:
1、选项separate_eval=False假设数据集使用self.data_infos在评估。因此,coco数据集不支持这种行为,因为coco数据集不完全依赖于self.data_infos进行评估。结合不同类型的数据集并将其作为一个整体进行评估没有经过测试,因此不建议。不支持评估ClassBalancedDataset和RepeatDataset,因此也不支持评估这些类型的连接数据集。
dataset_A_train = dict(
type='Dataset_A',
ann_file = ['anno_file_1', 'anno_file_2'],
pipeline=train_pipeline
)
要将连接的数据集作为一个整体进行测试,可以设置separate_eval=False,如下所示。
dataset_A_train = dict(
type='Dataset_A',
ann_file = ['anno_file_1', 'anno_file_2'],
separate_eval=False,
pipeline=train_pipeline
)
dataset_A_train = dict()
dataset_B_train = dict()
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train = [
dataset_A_train,
dataset_B_train
],
val = dataset_A_val,
test = dataset_A_test
)
dataset_A_val = dict()
dataset_B_val = dict()
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dataset_A_train,
val=dict(
type='ConcatDataset',
datasets=[dataset_A_val, dataset_B_val],
separate_eval=False))
这种方式允许用户通过设置separate_eval=False来评估所有数据集。
dataset_A_train = dict(
type='RepeatDataset',
times=N,
dataset=dict(
type='Dataset_A',
...
pipeline=train_pipeline
)
)
dataset_A_val = dict(
...
pipeline=test_pipeline
)
dataset_A_test = dict(
...
pipeline=test_pipeline
)
dataset_B_train = dict(
type='RepeatDataset',
times=M,
dataset=dict(
type='Dataset_B',
...
pipeline=train_pipeline
)
)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train = [
dataset_A_train,
dataset_B_train
],
val = dataset_A_val,
test = dataset_A_test
)
对于现有的数据集类型,我们可以修改它们的类名来训练注释的子集。
例如,如果您只想训练当前数据集的三个类,则可以修改数据集的类。数据集将自动过滤掉其他类的地面真值框。
classes = ('person', 'bicycle', 'car')
data = dict(
train=dict(classes=classes),
val=dict(classes=classes),
test=dict(classes=classes))
MMDetection V2.0还支持从文件中读取类,这在实际应用中很常见。
例如,假设classes.txt包含如下所示的类名。
person
bicycle
car
用户可以将类设置为文件路径,数据集将加载它并自动将其转换为列表。
classes = 'path/to/classes.txt'
data = dict(
train=dict(classes=classes),
val=dict(classes=classes),
test=dict(classes=classes))
注意:
在MMDetection v2.5.0之前,如果类被设置并且没有办法通过配置禁用,数据集将自动过滤掉空的GT图像。这是一种不受欢迎的行为,并会带来混乱,因为如果没有设置类,则在filter_empty_gt=True和test_mode=False时,数据集只过滤空的GT图像。在MMDetection v2.5.0之后,我们将图像过滤过程和类修改解耦,即当filter_empty_gt=True和test_mode=False时,无论是否设置类,数据集只过滤空的GT图像。因此,类的设置只影响用于训练的类的标注,用户可以自行决定是否过滤空的GT图像。
当在dataset中指定类时,请记住修改头部中的num_classes。
我们实现了NumClassCheckHook来检查数字是否自v2.9.0以来(在PR#4508之后)是一致的。•设置数据集类和数据集过滤的功能将在未来被重构为更友好的用户(取决于进度)。
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import warnings
from mmcv.cnn import VGG
from mmcv.runner.hooks import HOOKS, Hook
from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines import (LoadAnnotations, LoadImageFromFile,
LoadPanopticAnnotations)
from mmdet.models.dense_heads import GARPNHead, RPNHead
from mmdet.models.roi_heads.mask_heads import FusedSemanticHead
def replace_ImageToTensor(pipelines):
"""Replace the ImageToTensor transform in a data pipeline to
DefaultFormatBundle, which is normally useful in batch inference.
Args:
pipelines (list[dict]): Data pipeline configs.
Returns:
list: The new pipeline list with all ImageToTensor replaced by
DefaultFormatBundle.
Examples:
>>> pipelines = [
... dict(type='LoadImageFromFile'),
... dict(
... type='MultiScaleFlipAug',
... img_scale=(1333, 800),
... flip=False,
... transforms=[
... dict(type='Resize', keep_ratio=True),
... dict(type='RandomFlip'),
... dict(type='Normalize', mean=[0, 0, 0], std=[1, 1, 1]),
... dict(type='Pad', size_divisor=32),
... dict(type='ImageToTensor', keys=['img']),
... dict(type='Collect', keys=['img']),
... ])
... ]
>>> expected_pipelines = [
... dict(type='LoadImageFromFile'),
... dict(
... type='MultiScaleFlipAug',
... img_scale=(1333, 800),
... flip=False,
... transforms=[
... dict(type='Resize', keep_ratio=True),
... dict(type='RandomFlip'),
... dict(type='Normalize', mean=[0, 0, 0], std=[1, 1, 1]),
... dict(type='Pad', size_divisor=32),
... dict(type='DefaultFormatBundle'),
... dict(type='Collect', keys=['img']),
... ])
... ]
>>> assert expected_pipelines == replace_ImageToTensor(pipelines)
"""
pipelines = copy.deepcopy(pipelines)
for i, pipeline in enumerate(pipelines):
if pipeline['type'] == 'MultiScaleFlipAug':
assert 'transforms' in pipeline
pipeline['transforms'] = replace_ImageToTensor(
pipeline['transforms'])
elif pipeline['type'] == 'ImageToTensor':
warnings.warn(
'"ImageToTensor" pipeline is replaced by '
'"DefaultFormatBundle" for batch inference. It is '
'recommended to manually replace it in the test '
'data pipeline in your config file.', UserWarning)
pipelines[i] = {'type': 'DefaultFormatBundle'}
return pipelines
def get_loading_pipeline(pipeline):
"""Only keep loading image and annotations related configuration.
Args:
pipeline (list[dict]): Data pipeline configs.
Returns:
list[dict]: The new pipeline list with only keep
loading image and annotations related configuration.
Examples:
>>> pipelines = [
... dict(type='LoadImageFromFile'),
... dict(type='LoadAnnotations', with_bbox=True),
... dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
... dict(type='RandomFlip', flip_ratio=0.5),
... dict(type='Normalize', **img_norm_cfg),
... dict(type='Pad', size_divisor=32),
... dict(type='DefaultFormatBundle'),
... dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
... ]
>>> expected_pipelines = [
... dict(type='LoadImageFromFile'),
... dict(type='LoadAnnotations', with_bbox=True)
... ]
>>> assert expected_pipelines ==\
... get_loading_pipeline(pipelines)
"""
loading_pipeline_cfg = []
for cfg in pipeline:
obj_cls = PIPELINES.get(cfg['type'])
# TODO:use more elegant way to distinguish loading modules
if obj_cls is not None and obj_cls in (LoadImageFromFile,
LoadAnnotations,
LoadPanopticAnnotations):
loading_pipeline_cfg.append(cfg)
assert len(loading_pipeline_cfg) == 2, \
'The data pipeline in your config file must include ' \
'loading image and annotations related pipeline.'
return loading_pipeline_cfg
@HOOKS.register_module()
class NumClassCheckHook(Hook):
def _check_head(self, runner):
"""Check whether the `num_classes` in head matches the length of
`CLASSES` in `dataset`.
Args:
runner (obj:`EpochBasedRunner`): Epoch based Runner.
"""
model = runner.model
dataset = runner.data_loader.dataset
if dataset.CLASSES is None:
runner.logger.warning(
f'Please set `CLASSES` '
f'in the {dataset.__class__.__name__} and'
f'check if it is consistent with the `num_classes` '
f'of head')
else:
assert type(dataset.CLASSES) is not str, \
(f'`CLASSES` in {dataset.__class__.__name__}'
f'should be a tuple of str.'
f'Add comma if number of classes is 1 as '
f'CLASSES = ({dataset.CLASSES},)')
for name, module in model.named_modules():
if hasattr(module, 'num_classes') and not isinstance(
module, (RPNHead, VGG, FusedSemanticHead, GARPNHead)):
assert module.num_classes == len(dataset.CLASSES), \
(f'The `num_classes` ({module.num_classes}) in '
f'{module.__class__.__name__} of '
f'{model.__class__.__name__} does not matches '
f'the length of `CLASSES` '
f'{len(dataset.CLASSES)}) in '
f'{dataset.__class__.__name__}')
def before_train_epoch(self, runner):
"""Check whether the training dataset is compatible with head.
Args:
runner (obj:`EpochBasedRunner`): Epoch based Runner.
"""
self._check_head(runner)
def before_val_epoch(self, runner):
"""Check whether the dataset in val epoch is compatible with head.
Args:
runner (obj:`EpochBasedRunner`): Epoch based Runner.
"""
self._check_head(runner)
现在我们支持COCO的Panoptic数据集,Panoptic注释的格式不同于COCO格式。前景和背景都将存在于注释文件中。CoCO Panoptic格式的注释json文件有以下必要的键:
'images': [
{
'file_name': '000000001268.jpg',
'height': 427,
'width': 640,
'id': 1268
},
...
]
'annotations': [
{
'filename': '000000001268.jpg',
'image_id': 1268,
'segments_info': [
{
'id':8345037, # One-to-one correspondence with the id in the annotation map.
'category_id': 51,
'iscrowd': 0,
'bbox': (x1, y1, w, h), # The bbox of the background is the outer rectangle of its mask.
'area': 24315
},
...
]
},
...
]
'categories': [ # including both foreground categories and background categories
{'id': 0, 'name': 'person'},
...
]
此外,必须将seg_prefix设置为全景注释图像的路径。
data = dict(
type='CocoPanopticDataset',
train=dict(
seg_prefix = 'path/to/your/train/panoptic/image_annotation_data'
),
val=dict(
seg_prefix = 'path/to/your/train/panoptic/image_annotation_data'
)
)