注册机制:registry可以看成是一个类映射到一个字符串的映射。
from mmcv.cnn import MODELS as MMCV_MODELS
from mmcv.utils import Registry
#1. 创建注册表(表里填写映射)
MODELS = Registry('models', parent=MMCV_MODELS)
#2.将类模块注册到注册表中(字符串和类之间映射)'字符串Converter1' 类
@MODELS.register_module()
class Converter1(object):
pass
#3.模块注册成功,通过configs使用这个转换器。得到实例化
#配置文件中的type:待实例化的类名;后面是初始化参数
converter_cfg = dict(type='Converter1', a=a_value, b=b_value)
converter = MODELS.build(converter_cfg)
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector
from mmcv import Config
#1.初始化配置文件
cfg=Config.fromfile('configs/123/yolox.py')
#2.初始化模型
model=build_detector(cfg.model) #得到的是yolox实例化类
#3.初始化数据集
datasets=[build_dataset(cfg.data.train)] #得到的datasets是没有经过pipeline处理的
train_detector(model,datasets,cfg,distributed=False,validate=True)#分布式=F
from mmdet.datasets import (build_dataloader, build_dataset)
from mmcv.parallel import MMDataParallel
from mmcv.runner import (DistSamplerSeedHook, EpochBasedRunner,
Fp16OptimizerHook, OptimizerHook, build_optimizer,
build_runner, get_dist_info)
def train_detector(model,
dataset,
cfg,
distributed=False,
validate=False,
timestamp=None,
meta=None):
#4.初始化logger
logger = get_root_logger(log_level=cfg.log_level)
#5.初始化数据迭代器
dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
data_loaders = [
build_dataloader(
ds,
cfg.data.samples_per_gpu,
cfg.data.workers_per_gpu,
# `num_gpus` will be ignored if distributed
num_gpus=len(cfg.gpu_ids),#1
dist=distributed, #Fasele
seed=cfg.seed,#0
runner_type=runner_type,
persistent_workers=cfg.data.get('persistent_workers', False))
#dataset是一个MultiImageMixDataset对象
#每用for拿出一个就是,调用MultiImageMixDataset对象里的_getitem__函数
#拿出来的事经过12条pipeline处理的数据
for ds in dataset
]
#6.model放在gpu上(distributed=False)
model = MMDataParallel(
model.cuda(cfg.gpu_ids[0]), #0
device_ids=cfg.gpu_ids) #0
#7.初始化optimizer
optimizer = build_optimizer(model, cfg.optimizer)
#8.初始化runner
runner_type = 'EpochBasedRunner' #cfg中的就是这个
runner = build_runner(
cfg.runner,
default_args=dict(
model=model,
optimizer=optimizer,
work_dir=cfg.work_dir,
logger=logger,
meta=meta))#None
#日志相关 an ugly workaround to make .log and .log.json filenames the same
runner.timestamp = timestamp#nonne
#fp16 setting 混合精度训练 | fp16 用于神经网络训练和预测
fp16_cfg = cfg.get('fp16', None)#有就拿出来,没有就是none
if fp16_cfg is not None:#cdg中的是none
optimizer_config = Fp16OptimizerHook(
**cfg.optimizer_config, **fp16_cfg, distributed=distributed)
elif distributed and 'type' not in cfg.optimizer_config:
optimizer_config = OptimizerHook(**cfg.optimizer_config)
else:##########################################只执行这里
optimizer_config = cfg.optimizer_config
# 9. 注册 hooks
runner.register_training_hooks(
cfg.lr_config,
optimizer_config,
cfg.checkpoint_config,
cfg.log_config,
cfg.get('momentum_config', None),
custom_hooks_config=cfg.get('custom_hooks', None))
# 10.register eval hooks
if validate:
# Support batch_size > 1 in validation
val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1)
if val_samples_per_gpu > 1:
# Replace 'ImageToTensor' to 'DefaultFormatBundle'
cfg.data.val.pipeline = replace_ImageToTensor(
cfg.data.val.pipeline)
val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
val_dataloader = build_dataloader(
val_dataset,
samples_per_gpu=val_samples_per_gpu,
workers_per_gpu=cfg.data.workers_per_gpu,
dist=distributed,
shuffle=False)
eval_cfg = cfg.get('evaluation', {})
eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'
eval_hook = DistEvalHook if distributed else EvalHook
# In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the
# priority of IterTimerHook has been modified from 'NORMAL' to 'LOW'.
runner.register_hook(
eval_hook(val_dataloader, **eval_cfg), priority='LOW')
#11.加载权重
if cfg.resume_from:
runner.resume(cfg.resume_from)
elif cfg.load_from:
runner.load_checkpoint(cfg.load_from)
#12.开始训练
runner.run(data_loaders, cfg.workflow)
mmdet/models/builder.py/build_dtector(cfg.model)
cfg=Config.fromfile('configs/123/yolox.py')
model=build_detector(cfg.model)#根据配置文件信息得到实例化模型yolox
from mmcv.cnn import MODELS as MMCV_MODELS
from mmcv.utils import Registry
MODELS = Registry('models', parent=MMCV_MODELS)
DETECTORS = MODELS
@DETECTORS.register_module()
语句,注册模块(在创建模块时将实现的模块注册到注册表中)@DETECTORS.register_module()
class YOLOX(SingleStageDetector)
build
字句,根据cfg:model=dict(type=YOLOX)
将YOLOX
这个类实例化model=def build_detector(
#cfg=Config.fromfile('configs/123/yolox.py').model
cfg, #这里的cf只是总配置文件中的model字典
train_cfg=None,
test_cfg=None):
return DETECTORS.build(
cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg))
mmdet/datasets/builder.py/build_dataset(cfg.data.train)
datasets=[build_dataset(cfg.data.train)]
传来的配置字典里有:数据集的类型,数据集的地址,数据处理10+2
train_dataset = dict(
type='MultiImageMixDataset',
dataset=dict(
type=dataset_type,
classes = ('human body','ball','circle cage',
'square cage','tyre','metal bucket','cube','cylinder'),
ann_file='data/sna/annotations/train.json',
img_prefix='data/sna/train/',
#2个数据处理
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True)
],
filter_empty_gt=False,
),
pipeline=train_pipeline)#10个数据处理
args:传来的配置字典里有:数据集的类型,数据集的地址,数据处理10+2
return:最后得到的dataset
是 MultiImageMixDataset(**cp_cfg)
一个对象
from mmcv.utils import Registr
DATASETS = Registry('dataset')
PIPELINES = Registry('pipeline')
#传过来的cfg是train_dataset这个字典
def build_dataset(cfg, default_args=None):
from .dataset_wrappers import MultiImageMixDataset
# 函数isinstance()可以判断一个变量的类型
# 当拿到变量 cfg 时,可以使用 isinstance 判断类型:
# isinstance(cfg,(list, tuple))的结果是False
# cfg 不是(list, tuple)类型 ,而cfg是个字典!!
if isinstance(cfg, (list, tuple)):
dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
# cfg字典中的键type;值MultiImageMixDataset(多图像混合数据集)
elif cfg['type'] == 'MultiImageMixDataset':
cp_cfg = copy.deepcopy(cfg)#cp_cfg长得和train_dataset一样
# build_dataset(cp_cfg['dataset'])应该是build_from_cfgs)
# cp_cfg['dataset']里面是一个COCOdataset对象
cp_cfg['dataset'] = build_dataset(cp_cfg['dataset'])
cp_cfg.pop('type')#去掉这个键值对
#cp_cfg拿到了datasets,pipeline12个
dataset = MultiImageMixDataset(**cp_cfg)
elif isinstance(cfg.get('ann_file'), (list, tuple)):
dataset = _concat_dataset(cfg, default_args)
else:#返回一个COCOdataset对象
dataset = build_from_cfg(cfg, DATASETS, default_args)
return dataset
cocodataset
类位于mmdet/datasets/coco.py
。继承了customdataset
类customdataset
类位于mmdet/datasets/custom.py
cp_cfg
字典这个字典里有两个键值对:dataset,pipeline。
cp_cfg = dict(
dataset=是COCOdataset对象
pipeline=train_pipeline)
train_pipeline
:10个处理train_pipeline = [
1. dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
2. dict(
type='RandomAffine',
scaling_ratio_range=(0.1, 2),
border=(-img_scale[0] // 2, -img_scale[1] // 2)),
3. dict(
type='MixUp',
img_scale=img_scale,
ratio_range=(0.8, 1.6),
pad_val=114.0),
4. dict(type='YOLOXHSVRandomAug'),
5. dict(type='RandomFlip', flip_ratio=0.5),
# According to the official implementation, multi-scale
# training is not considered here but in the
# 'mmdet/models/detectors/yolox.py'.
6. dict(type='Resize', img_scale=img_scale, keep_ratio=True),
7. dict(
type='Pad',
pad_to_square=True,
# If the image is three-channel, the pad value needs
# to be set separately for each channel.
pad_val=dict(img=(114.0, 114.0, 114.0))),
8. dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
9. dict(type='DefaultFormatBundle'),
10. dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
dataset = MultiImageMixDataset(**cp_cfg)
此类位于mmdet/datasets/dataset_wrappers.py
中
@DATASETS.register_module()
class MultiImageMixDataset:
"""#多图像数据集包装器wrapper.
适用于多幅图像的训练,如混合数据增强 mosaic and mixup.
Args:
dataset (:obj:`CustomDataset`): 要混合的数据集(因为custom是所有类的父类)
pipeline (Sequence[dict]): 数据预处理操作(一个操作是一个字典)
dynamic_scale (tuple[int], optional): 图片动态放缩的比例. Default to None. .
skip_type_keys (list[str], optional): 要跳过的pipeline. Default to None.
"""
def __init__(self,
dataset,#键值对1:dataset 对应COCODataset对象
pipeline,#键值对2:pipeline=train_pipeline(这是一个列表,里面有10个字典)
dynamic_scale=None,
skip_type_keys=None):
self._skip_type_keys = skip_type_keys#none
self.pipeline = [] #10个pipeine的处理模块
self.pipeline_types = [] #10个pipeline的type
for transform in pipeline:#每次拿出一个字典
#一共有10个字典
if isinstance(transform, dict):
self.pipeline_types.append(transform['type'])
#transform:配置字典。PIPELINES:注册表
#实例化类对象transform
transform = build_from_cfg(transform, PIPELINES)#实例化这个处理模块
self.pipeline.append(transform)
'''
pipeline_types 里面有12个字符串
self.pipeline_types=['Mosaic','RandomAffine','MixUp','YOLOXHSVRandomAug',
'RandomFlip', 'Resize','Pad','FilterAnnotations',
'DefaultFormatBundle', 'Collect']
transform 里面有12个类对象
self.pipeline=[Mosaic,RandomAffine,MixUp,YOLOXHSVRandomAug,
RandomFlip,Resize,Pad,FilterAnnotations,
DefaultFormatBundle, Collect]
'''
self.dataset = dataset#cocodataset对象
self.CLASSES = dataset.CLASSES
#就是调用COCOdataset对象中的len方法:结果=annotations/train.json中的数据条数
#目前不知道num_samples 是图片个数还是图片里的bbox的个数
#len(dataset)=dataset.__len__(),现在知道了应该是图片的索引
self.num_samples = len(dataset)
#这个是python的magic方法。例如对一个对象dataset= MultiImageMixDataset(**cp_cfg)
#dataset_item = dataset[idx]
#等价于dataset = dataset.__getitem__(idx)
def __getitem__(self, idx):
# 获取当前某一张的图片信息
#self.dataset[idx]是调用cocodataset中的_.__getitem__.
#1.custome中的.__getitem__
#2.custom中的prepare_train_img(self, idx)
#3.cocodataset中的load_annotations(self, ann_file):
#4.cocodataset中的 get_ann_info(self, idx):
#5.custom pre_pipeline(results)
#####self.dataset[idx]得到的事经过load1load2这两个pipeline处理的第idx这张图片
#####return self.pipeline(results):最终得到的是一个results字典。
#self.dataset[idx]就是 那个根据 img的idx 最终得到的 results字典。
#6.这个results传进去,再经过10个pipeline处理,得到最终的results字典
results = copy.deepcopy(self.dataset[idx])#custom里得到的那个result字典
# 遍历 transorm,其中可以包括 mosaic 、mixup、flip 等各种 transform
for (transform, transform_type) in zip(self.pipeline, self.pipeline_types):
# 考虑到某些训练阶段需要动态关闭掉部分数据增强,故引入 _skip_type_keys
if self._skip_type_keys is not None and \
transform_type in self._skip_type_keys:
continue
##如果transform 中含有 get_indexes 方法,就先走这一步。
if hasattr(transform, 'get_indexes'):
indexes = transform.get_indexes(self.dataset)
if not isinstance(indexes, collections.abc.Sequence):
indexes = [indexes]
# 得到混合图片信息
mix_results = [
copy.deepcopy(self.dataset[index]) for index in indexes
]
results['mix_results'] = mix_results
#这个results传进去,再经过10个pipeline处理,得到最终的results字典
results = transform(results)#执行transfor的call函数
if 'mix_results' in results:
results.pop('mix_results')
return results
transform
处理所有的 pipeline处理都针对的是一张图片,最终得到的datasets对象,里面的__getitem__
方法是获得了一张图片的信息。得到一张图片的信息就是一个results字典。
先进行俩pipeline操作得到原始的ressults
Mosaic,RandomAffine,MixUp,YOLOXHSVRandomAug, RandomFlip,Resize,Pad,FilterAnnotations,
DefaultFormatBundle, Collect
经过这些pipeline处理就是对results
的键值对进行增删
最终得到的results字典里面只有4个键值对:img img_meta gtbox gtlabel
如果调用datasets中的_getitem_方法,最后的得到的是collect的输出
collect_results=
dict('img':DC(tensor,stacked=True,cpu_only=False),#DC是datacontainer的缩写
'img_metas':DC(dict('flip':bool,
'ori_shape':tuple,
'img_shape':tuple,
'pad_shape':tuple,
'scale_factor':float or ndarray(4,),
'img_norm_cfg':dict('mean':ndarray,'std':ndarray)),stacked=False,cpu_only=True),
'gt_bboxes':DC(tensor,stacked=Fasle,cpu_only=False),
'gt_labels':DC(tensor,stacked=False,cpu_only=False)
)
mmcv.utils.build_from_cfg
函数定义位于mmcv/mmcv/utils/registry.py
中def build_from_cfg(cfg, registry, default_args=None):
"""从配置字典中构建模块.
Args:
cfg (dict): Config dict. 至少包含 "type".这个键值对(模块参数可不用初始但要指定名称)
registry (:obj:`Registry`): 这个registry 来搜寻 type模块在哪
default_args (dict, optional): 默认的初始化参数
Returns:
object: 返回实例化对象
"""
# 1.判断输入的参数形式是否正确
if not isinstance(cfg, dict):
raise TypeError(f'cfg must be a dict, but got {type(cfg)}')
if 'type' not in cfg:
raise KeyError(
f'the cfg dict must contain the key "type", but got {cfg}')
if not isinstance(registry, Registry):
raise TypeError('registry must be an mmcv.Registry object, '
f'but got {type(registry)}')
if not (isinstance(default_args, dict) or default_args is None):
raise TypeError('default_args must be a dict or None, '
f'but got {type(default_args)}')
# 2.将obj_cls设置为config的type对应的class。
args = cfg.copy() # dict.copy()用来返回一个字典的浅复制。里面东西=cfg这个传入的字典
obj_type = args.pop('type') # 从args中删除type这个键值对,obj_type等于type对应的值。
#3.obj_cls就是得到了类名
if is_str(obj_type):
#registry.get(obj_type)是提取名为obj_type的class赋给obj_cls。
obj_cls = registry.get(obj_type)
if obj_cls is None:
raise KeyError(
f'{obj_type} is not in the {registry.name} registry')
elif inspect.isclass(obj_type): # 检查是否为类
obj_cls = obj_type
else:
raise TypeError(
f'type must be a str or valid type, but got {type(obj_type)}')
if default_args is not None:
for name, value in default_args.items():
# dict.setdefault():如果字典中包含有给定键,则返回该键对应的值,否则返回为该键设置的值。
args.setdefault(name, value)
return obj_cls(**args)#返回对象(且里面有配置字典所给定的初始值)
mmdet/datasets/builder.py/build_dataloader(一堆参数)
#最终得到的data_loaders是一个列表,里面是一堆dataloader
#[]之进行一次循环,里面只有一个元素,就是一个dataloader对象
data_loaders = [
build_dataloader(
ds,
cfg.data.samples_per_gpu,#8 batchsize
cfg.data.workers_per_gpu,#4 线程
# `num_gpus` will be ignored if distributed
num_gpus=len(cfg.gpu_ids),#1
dist=distributed,#False:非分布式训练
seed=cfg.seed,#0
runner_type=runner_type,#EpochBasedRunner
#好像不重要,应该是false。关于pytorch版本的东西
persistent_workers=cfg.data.get('persistent_workers', False))
for ds in dataset #只进行一次循环
] #dataset是一个MultiImageMixDataset对象
#每用for拿出一个就是,调用MultiImageMixDataset对象里的_getitem__函数
#拿出来的是经过12条pipeline处理的某一张图片的数据
#但是这里的dataset是一个列表,列表里只有一个元素,就是muli对象
#所以for循环只执行一次
mmdet.datasets.samplers.group_sampler
sampler = GroupSampler(dataset,samples_per_gpu) if shuffle else None
mmcv.parallel.collate
torch.utils.data.dataloader.efault_collate
collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),#这是一个函数
from torch.utils.data import DataLoader
函数 build_dataloader
返回值return data_loader
传来的参数:
data_loader = DataLoader(
dataset,#还是最开始传入的dataset:一张图片的results字典
batch_size=batch_size,#8
sampler=sampler,#前面获得的sampler对象,,每次返回一个整数下标索引
num_workers=num_workers,#4
batch_sampler=batch_sampler,#none
#(collate这个函数,8)。如果用到collate_fn(x):x和8做collate的操作。collate:
collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),#这是一个函数
pin_memory=False,
worker_init_fn=init_fn,#none
**kwargs)
DataLoader
类对象
class DataLoader(object):
__initialized = False
def __init__(self,
dataset, #可能是一张图片的信息,应该是所有图片的信息阿
batch_size=1, #8
#(default)是否随即组成每个batch,
#因为下面定义了sampler,控制 Dataset 输出的数据顺序,
#可能是就是规定了一种特定的组成batch的方法。
#就算是组成batch既不是随即的,也不是按顺序的,是有一定规律的
shuffle=False,
sampler=None, #数据采样器 从数据集中随机取样,,每次返回一个整数下标索引
batch_sampler=None,#none
num_workers=0, #0(default)
collate_fn=default_collate,#collate函数 合并样本列表以形成小批量
pin_memory=False, #(default)
drop_last=False, #(default)如果datasizze不能整除8,是否把最后一个batchdrop掉
timeout=0, #(default)
worker_init_fn=None):#none
"""
Data loader. Combines a dataset and a sampler, and provides
single- or multi-process iterators over the dataset.
Arguments:
dataset (Dataset): dataset from which to load the data.
batch_size (int, optional): how many samples per batch to load
(default: 1).
shuffle (bool, optional): set to ``True`` to have the data reshuffled
at every epoch (default: False).
sampler (Sampler, optional): defines the strategy to draw samples from
the dataset. If specified, ``shuffle`` must be False.
定义从中提取样本的策略数据集。如果指定,“shuffle”必须为False。
batch_sampler (Sampler, optional): like sampler, but returns a batch of
indices at a time. Mutually exclusive with batch_size, shuffle,
sampler, and drop_last.
num_workers (int, optional): how many subprocesses to use for data
loading. 0 means that the data will be loaded in the main process.
(default: 0)
collate_fn (callable, optional): merges a list of samples to form a mini-batch."""
self.dataset = dataset
self.batch_size = batch_size
self.num_workers = num_workers
self.collate_fn = collate_fn
self.pin_memory = pin_memory
self.drop_last = drop_last
self.timeout = timeout
self.worker_init_fn = worker_init_fn
self.sampler = sampler
self.__initialized = True
if batch_sampler is None:
batch_sampler = BatchSampler(sampler, batch_size, drop_last)
self.batch_sampler = batch_sampler
def __setattr__(self, attr, val):
if self.__initialized and attr in ('batch_size', 'sampler', 'drop_last'):
raise ValueError('{} attribute should not be set after {} is '
'initialized'.format(attr, self.__class__.__name__))
super(DataLoader, self).__setattr__(attr, val)
def __iter__(self):
return _DataLoaderIter(self)
def __len__(self):
return len(self.batch_sampler)
MMDataParallel
(单gpu版本的model上层封装)model = MMDataParallel(
model.cuda(cfg.gpu_ids[0]), #0
device_ids=cfg.gpu_ids) #0
mmcv.parallel .MMDataParallel
model是MMDataParallel类对象。
from itertools import chain。
from torch.nn.parallel import DataParallel
from .scatter_gather import scatter_kwargs
class MMDataParallel(DataParallel): # 继承于pytorch.DataParallel
def __init__(self, *args, dim=0, **kwargs):
# 构造函数和pytorch的DataParallel一致
super(MMDataParallel, self).__init__(*args, dim=dim, **kwargs)
self.dim = dim
def forward(self, *inputs, **kwargs):
# 在api/test.py和api/inference.py中调用model(return_loss=False, rescale=True, **data_batch)所以实际上,这里参数只使用了kwargs,inputs为空tuple()
return super().forward(*inputs, **kwargs)
# pytorch的forward,实现了把数据平均分发到各个 GPU 上
#每个 GPU 实际的数据量为batch/gpu_num
def scatter(self, inputs, kwargs, device_ids): # 非常重要######
return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
def train_step(self, *inputs, **kwargs):
# 参数就使用了inputs,为(data_loader[i])
# kwargs为空字典{}
# MMDataParallel只支持单GPU
assert len(self.device_ids) == 1,
for t in chain(self.module.parameters(), self.module.buffers()):
# 遍历parameter和buffer,parameter记录需要更新的参数,buffer记录不需要更新的参数
# 是判断模型是不是在主GPU上
if t.device != self.src_device_obj:
raise RuntimeError(
'module must have its parameters and buffers '
f'on device {self.src_device_obj} (device_ids[0]) but '
f'found one of them on device: {t.device}')
inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)# 非常重要######
return self.module.train_step(*inputs[0], **kwargs[0])# 调用真正的module
def val_step(self, *inputs, **kwargs):
# 参数就使用了inputs,为(data_loader[i])
# kwargs为空字典{}
# MMDataParallel只支持单GPU,MMDistributedDataParallel才支持多GPU
assert len(self.device_ids) == 1
for t in chain(self.module.parameters(), self.module.buffers()):
if t.device != self.src_device_obj:
raise RuntimeError(
'module must have its parameters and buffers '
f'on device {self.src_device_obj} (device_ids[0]) but '
f'found one of them on device: {t.device}')
inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)# 非常重要######
return self.module.val_step(*inputs[0], **kwargs[0]) # 调用真正的module
里面最重要的方法scatter,是对dataloader输出的数据进行解封装,变成pytorch可以处理的格式,放到模型上处理,得到的处理结果就是模型的输入。self.module.train_step。
# num_gpus就是分布式训练时的gpu数量,默认为1,数组中只有一个dict元素
tuple(num_gpus *
dict('img': tensor(Batch,C,H,W) ,
'img_metas': list[Batch*dict('flip','ori_shape'……)],
'gt_bboxes': list[Batch*tensor],
'gt_labels': list[Batch*tensor]
)
)
build_optimizer
# model是MMDataParallel对象,对原始model进行了封装
optimizer = build_optimizer(model, cfg.optimizer)
由配置文件构造模块
cfg.optimizer = dict(
type='SGD',
lr=0.01/8,
momentum=0.9,
weight_decay=5e-4,
nesterov=True,
paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))
#关闭 norm 和 bias 的weight decay。
optimizer_config = dict(grad_clip=None)
函数解析 build_optimizer(model, cfg.optimizer)。最后就是得到一个sgd
模块(model)
def build_optimizer(model, cfg):
optimizer_cfg = copy.deepcopy(cfg)#先复制一份配置文件
#pop先赋值,再去掉
constructor_type = optimizer_cfg.pop('constructor',
'DefaultOptimizerConstructor')
paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None)
# 得到的 optim_constructor 是通过配置文件中的type构造的sgd模块把应该是
optim_constructor = build_optimizer_constructor(
dict(
type=constructor_type,
optimizer_cfg=optimizer_cfg,
paramwise_cfg=paramwise_cfg))
#优化器我就不看了
optimizer = optim_constructor(model)
return optimizer
'''
cfg=dict(
type=constructor_type,
optimizer_cfg=optimizer_cfg,
paramwise_cfg=paramwise_cfg)
'''
def build_optimizer_constructor(cfg):
return build_from_cfg(cfg, OPTIMIZER_BUILDERS)
build_runner
runner_type = 'EpochBasedRunner' #cfg中的就是这个
runner = build_runner(
cfg.runner,
default_args=dict(
model=model,
optimizer=optimizer,
work_dir=cfg.work_dir,
logger=logger,
meta=meta))#None
由配置文件构造模块
cfg.runner = dict(type='EpochBasedRunner', max_epochs=300)
函数解析 build_runner(model, cfg.optimizer)。最后就是得到一个EpochBasedRunner
模块(model)
(mmcv.runner.epoch_based_runner.py
)继承了BaseRunner
(mmcv.runner.base_runner.py
)
def build_runner(cfg, default_args=None):
runner_cfg = copy.deepcopy(cfg)
constructor_type = runner_cfg.pop('constructor', 'DefaultRunnerConstructor')
#得到的 runner_constructor 是通过配置文件中的type构造的sgd模块把应该是
runner_constructor = build_runner_constructor(
dict(
type=constructor_type,
runner_cfg=runner_cfg,
default_args=default_args))
runner = runner_constructor()
return runner
def build_runner_constructor(cfg):
return RUNNER_BUILDERS.build(cfg)
runner.register_training_hooks(
cfg.lr_config,
optimizer_config,
cfg.checkpoint_config,
cfg.log_config,
cfg.get('momentum_config', None),#配置文件里没有,这里是拿出来的是none
custom_hooks_config=cfg.get('custom_hooks', None))
register_training_hooks
接受所有的配置文件(一堆字典)然后发给不同函数
def register_training_hooks(self,
lr_config,##
optimizer_config=None,##
checkpoint_config=None,##
log_config=None,##
momentum_config=None,#none
timer_config=dict(type='IterTimerHook'),#就是默认
custom_hooks_config=None):##
"""Register default and custom hooks for training.
Default and custom hooks include:
+----------------------+-------------------------+
| Hooks | Priority 优先级 字符和数字的对应关系 |
+======================+=========================+
| LrUpdaterHook | VERY_HIGH (10) |
+----------------------+-------------------------+
| MomentumUpdaterHook | HIGH (30) |
+----------------------+-------------------------+
| OptimizerStepperHook | ABOVE_NORMAL (40) |
+----------------------+-------------------------+
| CheckpointSaverHook | NORMAL (50) |
+----------------------+-------------------------+
| IterTimerHook | LOW (70) |
+----------------------+-------------------------+
| LoggerHook(s) | VERY_LOW (90) |
+----------------------+-------------------------+
| CustomHook(s) | defaults to NORMAL (50) |
+----------------------+-------------------------+
If custom hooks have same priority with default hooks, custom hooks
will be triggered after default hooks.先触发的fault再触发custom
"""
self.register_lr_hook(lr_config)##
self.register_momentum_hook(momentum_config)##none
self.register_optimizer_hook(optimizer_config)##
self.register_checkpoint_hook(checkpoint_config)##
self.register_timer_hook(timer_config)##默认
self.register_logger_hooks(log_config)##
self.register_custom_hooks(custom_hooks_config)##
YOLOXLrUpdaterHook
在mmdet/core/hook/yolox_lrupdater_hook.py
训练的学习率策略
'''
lr_config = dict(
_delete_=True,
policy='YOLOX',最后变成YOLOXLrUpdaterHook类型
warmup='exp',
by_epoch=False,
warmup_by_epoch=True,
warmup_ratio=1,
warmup_iters=5, # 5 epoch
num_last_epochs=num_last_epochs,
min_lr_ratio=0.05
step=[8, 11])
'''
def register_lr_hook(self, lr_config):
elif isinstance(lr_config, dict):
policy_type = lr_config.pop('policy')#=yolox
# If the type of policy is all in lower case, e.g., 'cyclic',
# then its first letter will be capitalized, e.g., to be 'Cyclic'.
# This is for the convenient usage of Lr updater.
# Since this is not applicable for `
# CosineAnnealingLrUpdater`,
# the string will not be changed if it contains capital letters.
#这一堆注释是为了改名的,没有意义
if policy_type == policy_type.lower():
policy_type = policy_type.title()
#hook_type=YOLOXLrUpdaterHook
hook_type = policy_type + 'LrUpdaterHook'
lr_config['type'] = hook_type
#根据配置文件 lr_config 构建一个LrUpdaterHook对象
hook = mmcv.build_from_cfg(lr_config, HOOKS)
#将hook插入hooks队列中,并指定优先级
self.register_hook(hook, priority='VERY_HIGH==10')
'''
none
'''
def register_momentum_hook(self, momentum_config):
if momentum_config is None:
return
'''
optimizer_config = dict(grad_clip=None)
'''
def register_optimizer_hook(self, optimizer_config):
#这个hook是OptimizerHook
if isinstance(optimizer_config, dict):
optimizer_config.setdefault('type', 'OptimizerHook')
hook = mmcv.build_from_cfg(optimizer_config, HOOKS)
#将这个hook插入到hook序列中
self.register_hook(hook, priority='ABOVE_NORMAL')
'''
checkpoint_config = dict(interval=10)
'''
def register_checkpoint_hook(self, checkpoint_config):
if isinstance(checkpoint_config, dict):
checkpoint_config.setdefault('type', 'CheckpointHook')
hook = mmcv.build_from_cfg(checkpoint_config, HOOKS)
self.register_hook(hook, priority='NORMAL')
'''
timer_config=dict(type='IterTimerHook')
'''
def register_timer_hook(self, timer_config):
if isinstance(timer_config, dict):
timer_config_ = copy.deepcopy(timer_config)
hook = mmcv.build_from_cfg(timer_config_, HOOKS)
else:
hook = timer_config
self.register_hook(hook, priority='LOW')
'''
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
])
'''
def register_logger_hooks(self, log_config):
log_interval = log_config['interval'] #50
for info in log_config['hooks']:
logger_hook = mmcv.build_from_cfg(
info, HOOKS, default_args=dict(interval=log_interval))
self.register_hook(logger_hook, priority='VERY_LOW')
'''
custom_hooks = [
dict(
type='YOLOXModeSwitchHook',
num_last_epochs=num_last_epochs,
priority=48),
dict(
type='SyncNormHook',
num_last_epochs=num_last_epochs,
interval=interval,
priority=48),
dict(
type='ExpMomentumEMAHook',
resume_from=resume_from,
momentum=0.0001,
priority=49)
] '''
def register_custom_hooks(self, custom_config):
for item in custom_config:
if isinstance(item, dict):
self.register_hook_from_cfg(item)
else:
self.register_hook(item, priority='NORMAL')
'''
配置文件:
resume_from = None
load_from = 'checkpoints/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth'
'''
if cfg.resume_from:#none
runner.resume(cfg.resume_from)
elif cfg.load_from:#加载预训练权重文件
runner.load_checkpoint(cfg.load_from)
'''
data_loaders=上面获得的dataloaders
cfg.workflow=[('train',1)]只是表示跑一个epoch是一个循环
'''
runner.run(data_loaders, cfg.workflow)
runner.run(data_loaders, workflow)
def run(self, data_loaders, workflow, max_epochs=None, **kwargs):
"""Start running!!!!!!!!
Args:
data_loaders (list[:obj:`DataLoader`]): Dataloaders for training and validation.
workflow (list[tuple]): A list of (phase, epochs) to specify the
running order and epochs. E.g, [('train', 2), ('val', 1)] means
running 2 epochs for training and 1 epoch for validation,
iteratively.
"""
#data_loaders的长度=1,里面只有一个对象。
assert len(data_loaders) == len(workflow)
#0._max_epochs
self._max_epochs = max_epochs#300
#1.计算所有的iter(300* len(data_loaders[i]))
#data_loaders[i]应该是个字典的,它的len是什么
#data_loaders[i]是个字典,len(字典)=有几个键值对??????
self._max_iters = self._max_epochs * len(data_loaders[i])#i=0
#2.self.work_dir=cfg.work_dir='work_dir_yoloxl'
work_dir = self.work_dir
#一堆log操作
#2.运行run前的hook
#call__hook函数里面是 getattr,就是执行名字为:before_run 的hook
self.call_hook('before_run')
while self.epoch < self._max_epochs:#self.epoch <300
mode=train
epochs =1
#保证mode是个字符串=train
if isinstance(mode, str):
#epoch_runner被赋予了一个方法名的字符串=runner.train()这个方法
epoch_runner = getattr(self, mode)
for _ in range(epochs):#epochs=1
if mode == 'train' and self.epoch >= self._max_epochs:
break
#运行runner.train()这个方法
###############################
epoch_runner(data_loaders[i], **kwargs)
time.sleep(1) # wait for some hooks like loggers to finish
self.call_hook('after_run')
runner.train(dataloder)
def train(self, data_loader):
self.model.train() ############
self.mode = 'train'#mode赋值
self.data_loader = data_loader #data_loaders[i]是一个batch的字典
self._max_iters = self._max_epochs * len(self.data_loader)
self.call_hook('before_train_epoch')#call_hook函数:执行那些操作
time.sleep(2) # Prevent possible deadlock during epoch transition
for i, data_batch in enumerate(self.data_loader):
##每次拿出的data_batch就是一个小批量的数据,进行一次梯度下降
self._inner_iter = i
self.call_hook('before_train_iter')
self.run_iter(data_batch, train_mode=True)##############
self.call_hook('after_train_iter')
self._iter += 1
self.call_hook('after_train_epoch')
self._epoch += 1
runner.run_iter(data_batch, train_mode=True)
def run_iter(self, data_batch, train_mode):
elif train_mode:
outputs = self.model.train_step(data_batch, self.optimizer)
self.outputs = outputs
YOLOX,SingleStageDetector,Base
1.model
=yolox
在mmdet/models/detectors/yolox.py
2.SingleStageDetector
在mmdet/models/detectors/single_stage.py
3.BaseDetector
在mmdet/models/detectors/base.py
yolox
继承SingleStageDetector
继承BaseDetector
1调用BaseDetector
中的train_step
。2.train_step
再调用BaseDetector
的foward
。3.forward
再调用被yolox
重写的forward_train
。4.forward_train
里面又调用了single_stage
中的forward_stage
。5.得到的结果就是run_iter
中得到的outputs
if validate:#true
# Support batch_size > 1 in validation
# val_samples_per_gpu=1
val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1)
if val_samples_per_gpu > 1:
# Replace 'ImageToTensor' to 'DefaultFormatBundle'
cfg.data.val.pipeline = replace_ImageToTensor(
cfg.data.val.pipeline)
#创建测试集
'''
val=dict(
type=dataset_type,
classes=('human body', 'ball', 'circle cage', 'square cage', 'tyre', 'metal bucket', 'cube', 'cylinder'),
ann_file='data/sna/annotations/val.json',
img_prefix='data/sna/val/',
pipeline=test_pipeline),
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=img_scale,
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Pad',
pad_to_square=True,
pad_val=dict(img=(114.0, 114.0, 114.0))),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img'])
])]
'''
val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
#创建数据迭代器
val_dataloader = build_dataloader(
val_dataset,
samples_per_gpu=val_samples_per_gpu,#1
workers_per_gpu=cfg.data.workers_per_gpu,#4
dist=distributed,
shuffle=False)
#注册hook
'''
eval_cfg =
evaluation = dict(
save_best='auto',
interval=10,
dynamic_intervals=[(300 -15, 1)],
metric='bbox')
'''
eval_cfg = cfg.get('evaluation', {})
#eval_cfg['by_epoch']=True
eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'
#eval_hook=EvalHook
eval_hook = DistEvalHook if distributed else EvalHook
# In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the
# priority of IterTimerHook has been modified from 'NORMAL' to 'LOW'.
# 这样看这里面只有1个hook,将它插入到hook队列
runner.register_hook(
eval_hook(val_dataloader, **eval_cfg), priority='LOW')
这是里面的EvalHook
在mmmdet.core.evaluation.eval_hooks
#没有参数全都是默认的
class EvalHook(BaseEvalHook):
def __init__(self, *args, dynamic_intervals=None, **kwargs):
super(EvalHook, self).__init__(*args, **kwargs)
self.use_dynamic_intervals = dynamic_intervals is not None
if self.use_dynamic_intervals:
self.dynamic_milestones, self.dynamic_intervals = \
_calc_dynamic_intervals(self.interval, dynamic_intervals)
def _decide_interval(self, runner):
if self.use_dynamic_intervals:
progress = runner.epoch if self.by_epoch else runner.iter
step = bisect.bisect(self.dynamic_milestones, (progress + 1))
# Dynamically modify the evaluation interval
self.interval = self.dynamic_intervals[step - 1]
def before_train_epoch(self, runner):
"""Evaluate the model only at the start of training by epoch."""
self._decide_interval(runner)
super().before_train_epoch(runner)
def before_train_iter(self, runner):
self._decide_interval(runner)
super().before_train_iter(runner)
def _do_evaluate(self, runner):
"""perform evaluation and save ckpt."""
if not self._should_evaluate(runner):
return
from mmdet.apis import single_gpu_test
results = single_gpu_test(runner.model, self.dataloader, show=False)
runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)
key_score = self.evaluate(runner, results)
if self.save_best:
self._save_ckpt(runner, key_score)
继承的evalhook
在/mmcv/runner/hooks/evaluation
在run的过程中,当执行after_train_epoch
方法时候,调用evalhook中的after_train_epoch
(1.执行_should_evaluate
判断是否进行评估,2.执行_do_evaluate:
1的结果是false:return;1的结果是true:执行mmdet.api.test.py,single_gpu_test
进行评估)
class YOLOX
在mmdel/models/detectors/yolox.py
class CSPDarknet
在 mmdel/models/backbones/csp_darknet.py
class YOLOXPAFPN
在 mmdel/models/necks/yolox_pafpn.py
class YOLOXHEAD
在 mmdel/models/denseheads/yolox_head.py