在mmdetection框架中,数据集的加载是一个非常重要的过程。 在之前的博客中,我主要讲解了pipeline,本文将更进一步,向更底层深入,讲解COCO数据集在进行pipeline处理之前的加载过程。
无论我们是训练还是测试,每当我们运行mmdetection时,都能在命令行最先看到如下的输出
这是怎样实现的,运行的哪些语句?本文将为您揭晓答案。
在mmdet/datasets/coco.py中,定义了CocoDataset
这个类,这个类是继承自CustomDataset
(我的博客中介绍分析了这个类)。我将这个类的部分代码展示在下面
@DATASETS.register_module
class CocoDataset(CustomDataset):
CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush')
def load_annotations(self, ann_file):
self.coco = COCO(ann_file)
self.cat_ids = self.coco.getCatIds()
self.cat2label = {
cat_id: i + 1
for i, cat_id in enumerate(self.cat_ids)
}
self.img_ids = self.coco.getImgIds()
img_infos = []
for i in self.img_ids:
info = self.coco.loadImgs([i])[0]
info['filename'] = info['file_name']
img_infos.append(info)
return img_infos
一开始看到这里的时候,突然一楞,怎么没有__ init__
方法。然后仔细一看,CocoDataset
类继承自CustomDataset
类,在CustomDataset
类中定义了__init__
方法,其代码如下
def __init__(self,
ann_file,
pipeline,
data_root=None,
img_prefix=None,
seg_prefix=None,
proposal_file=None,
test_mode=False,
dynamic_input=None
):
self.ann_file = ann_file
self.data_root = data_root
self.img_prefix = img_prefix
self.seg_prefix = seg_prefix
self.proposal_file = proposal_file
self.test_mode = test_mode
self.dynamic_input = dynamic_input
# join paths if data_root is specified
if self.data_root is not None:
if not osp.isabs(self.ann_file):
self.ann_file = osp.join(self.data_root, self.ann_file)
if not (self.img_prefix is None or osp.isabs(self.img_prefix)):
self.img_prefix = osp.join(self.data_root, self.img_prefix)
if not (self.seg_prefix is None or osp.isabs(self.seg_prefix)):
self.seg_prefix = osp.join(self.data_root, self.seg_prefix)
if not (self.proposal_file is None
or osp.isabs(self.proposal_file)):
self.proposal_file = osp.join(self.data_root,
self.proposal_file)
# load annotations (and proposals)
self.img_infos = self.load_annotations(self.ann_file)
if self.proposal_file is not None:
self.proposals = self.load_proposals(self.proposal_file)
else:
self.proposals = None
# filter images with no annotation during training
if not test_mode:
valid_inds = self._filter_imgs()
self.img_infos = [self.img_infos[i] for i in valid_inds]
if self.proposals is not None:
self.proposals = [self.proposals[i] for i in valid_inds]
# set group flag for the sampler
if not self.test_mode:
self._set_group_flag()
# processing pipeline
self.pipeline = Compose(pipeline)
在配置文件configs/faster_rcnn_r50_fpn_1x.py中,定义了
dataset_type = 'CocoDataset'
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
而dataset_type
变量的内容就刚好对应了CocoDataset
这个类的名字,这和我们之前谈的mmdetection的注册机制就吻合了。
因此,首先创建了CocoDataset
类的实例,然后在初始化__init__
方法中调用了self.load_annotations(self.ann_file)
。而在load_annotations
方法中,定义了self.coco = COCO(ann_file)
。这里的COCO
是pycocotools
中定义的类,其代码如下
class COCO:
def __init__(self, annotation_file=None):
"""
Constructor of Microsoft COCO helper class for reading and visualizing annotations.
:param annotation_file (str): location of annotation file
:param image_folder (str): location to the folder that hosts images.
:return:
"""
# load dataset
self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict()
self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
if not annotation_file == None:
print('loading annotations into memory...')
tic = time.time()
dataset = json.load(open(annotation_file, 'r'))
assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset))
print('Done (t={:0.2f}s)'.format(time.time()- tic))
self.dataset = dataset
self.createIndex()
def createIndex(self):
# create index
print('creating index...')
anns, cats, imgs = {}, {}, {}
imgToAnns,catToImgs = defaultdict(list),defaultdict(list)
if 'annotations' in self.dataset:
for ann in self.dataset['annotations']:
imgToAnns[ann['image_id']].append(ann)
anns[ann['id']] = ann
if 'images' in self.dataset:
for img in self.dataset['images']:
imgs[img['id']] = img
if 'categories' in self.dataset:
for cat in self.dataset['categories']:
cats[cat['id']] = cat
if 'annotations' in self.dataset and 'categories' in self.dataset:
for ann in self.dataset['annotations']:
catToImgs[ann['category_id']].append(ann['image_id'])
print('index created!')
# create class members
self.anns = anns
self.imgToAnns = imgToAnns
self.catToImgs = catToImgs
self.imgs = imgs
self.cats = cats
不难看出,这就是命令行最初命令输出的由来。根据上述代码,我们可以发现,COCO数据集中图像加载的先后顺序是由注释json文件决定的。不过需要注意,这是在没有进行shuffle的条件下。
关闭shuffle的方式,在mmdet/apis/train.py中
data_loaders = [
build_dataloader(
ds,
cfg.data.imgs_per_gpu,
cfg.data.workers_per_gpu,
cfg.gpus,
dist=False,
shuffle=False) for ds in dataset
]