mmdetection使用

文章目录

  • 前言
    • 记录mmdetection2.19.0使用的经验
  • 一、配置文件Config.py
      • 1.model
      • 2.dataset
      • 3.optimizer
      • 4.default_runtime
  • 总结


前言

记录mmdetection2.19.0使用的经验

一、配置文件Config.py

这里以faster-rcnn和retinanet为例

1.model

# model settings
model = dict(
    type='FasterRCNN',  # 检测模型            
    backbone=dict(		# 特征提取网络
        type='ResNet',	# 名称
        depth=50,		# 后面的都是初始化函数里的参数,我们可以通过这里
        num_stages=4,	# 来修改默认的参数
        out_indices=(0, 1, 2, 3), # 输出层的索引
        frozen_stages=1,	# 冻结的层数,=1表示冻结resnet的第一个layer层
        norm_cfg=dict(type='BN', requires_grad=True),	# norm
        norm_eval=True,	# 训练过程冻结BN层,而在推理过程时使用BN层 
# 不更新BN层的统计信息,running_var和running_mean,更新其weight和bias的学习参数。
        style='pytorch',
        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), # 加载的预训练权重
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048], # 输入FPN的各层通道
        out_channels=256, # 输出通道     	
        num_outs=5),		#总的输出层数
    rpn_head=dict(
        type='RPNHead',	# 一般单层预测
        in_channels=256, # 输入通道数
        feat_channels=256, # 特征层的通道数
        anchor_generator=dict(	# 生成anchor的设置
            type='AnchorGenerator',
            scales=[8],	# Anchor的大小
            ratios=[0.5, 1.0, 2.0], # Anchor的高宽比
            strides=[4, 8, 16, 32, 64]), # 对应原图的步长
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',	# bbox的编解码
            target_means=[.0, .0, .0, .0],	# 均值
            target_stds=[1.0, 1.0, 1.0, 1.0]), # 方差
        loss_cls=dict(	# 损失函数使用交叉熵,使用sigmoid
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
    roi_head=dict(	# 
        type='StandardRoIHead',
        bbox_roi_extractor=dict(
            type='SingleRoIExtractor',
            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
            out_channels=256,
            featmap_strides=[4, 8, 16, 32]), # 对四个特征层进行ROI
        bbox_head=dict(
            type='Shared2FCBBoxHead',
            in_channels=256,	# ROI后的输入
            fc_out_channels=1024, # 预测层
            roi_feat_size=7,	
            num_classes=80,   # 类别数量
            bbox_coder=dict(
                type='DeltaXYWHBBoxCoder',
                target_means=[0., 0., 0., 0.],
                target_stds=[0.1, 0.1, 0.2, 0.2]), # 均值变小了
            reg_class_agnostic=False, #class_agnostic表示输出bbox时只考			    
            loss_cls=dict(		#虑其是否为前景
                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),	# use_sigmoid=False,使用softmax
            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
    # model training and testing settings
    train_cfg=dict(
        rpn=dict(
            assigner=dict(
                type='MaxIoUAssigner', # 最大IOU分配
                pos_iou_thr=0.7,	# 正样本阈值
                neg_iou_thr=0.3,	# 负样本阈值
                min_pos_iou=0.3,	# 补充正样本时最小的阈值
                match_low_quality=True, # 开启低质量分配,确保高召回
                ignore_iof_thr=-1),	# 忽略
            sampler=dict(
                type='RandomSampler',
                num=256,	# 需采样的正负样本数量
                pos_fraction=0.5, # 正样本比例
                neg_pos_ub=-1,	# 负样本比例,-1表示不忽略
                add_gt_as_proposals=False), # 把ground truth加入proposal作为正样本
            allowed_border=-1,	#允许在bbox边界外扩一定的像素 				
            pos_weight=-1,	# 正样本权重,-1表示不改变原始的权重
            debug=False),	
        rpn_proposal=dict(	
            nms_pre=2000,	# nms保留预测bbox的数量
            max_per_img=1000,	# 最大保留数量
            nms=dict(type='nms', iou_threshold=0.7),# nms分数阈值
            min_bbox_size=0),  # 最小bbox尺寸
        rcnn=dict(
            assigner=dict(
                type='MaxIoUAssigner', #和上面一样
                pos_iou_thr=0.5, # 放宽了正样本的阈值
                neg_iou_thr=0.5, # 
                min_pos_iou=0.5,
                match_low_quality=False, # 关闭低质量框,保证准确率
                ignore_iof_thr=-1), #
            sampler=dict(
                type='RandomSampler',
                num=512, # 最后保留的样本数量
                pos_fraction=0.25,	#正样本比例
                neg_pos_ub=-1,	#负样本
                add_gt_as_proposals=True), # 
            pos_weight=-1,
            debug=False)),
    test_cfg=dict(
        rpn=dict(
            nms_pre=1000,
            max_per_img=1000,
            nms=dict(type='nms', iou_threshold=0.7),
            min_bbox_size=0),
        rcnn=dict(
            score_thr=0.05,
            nms=dict(type='nms', iou_threshold=0.5),
            max_per_img=100) # 预测的最多保留100个bbox
        # soft-nms is also supported for rcnn testing
        # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
    ))

# model settings
model = dict(
    type='RetinaNet',
    backbone=dict(
        type='ResNet',
        depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        norm_cfg=dict(type='BN', requires_grad=True),
        norm_eval=True,
        style='pytorch',
        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        start_level=1,
        add_extra_convs='on_input',
        num_outs=5),
    bbox_head=dict(	# 一阶段的这里不同
        type='RetinaHead',
        num_classes=80,
        in_channels=256, # 输入256
        stacked_convs=4, # 经过4个卷积层
        feat_channels=256,	# 4个特征层的通道数
        #norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
        anchor_generator=dict(
            type='AnchorGenerator', # 生成anchor
            octave_base_scale=4, # 基础尺寸,后面还要处理
            scales_per_octave=3, # anchor数量
            ratios=[0.5, 1.0, 2.0], #比例
            strides=[8, 16, 32, 64, 128]),
        bbox_coder=dict(
            type='DeltaXYWHBBoxCoder',
            target_means=[.0, .0, .0, .0],
            target_stds=[1.0, 1.0, 1.0, 1.0]),
        loss_cls=dict(
            type='FocalLoss', # FocalLoss
            use_sigmoid=True,
            gamma=2.0,	# FocalLoss的一些超参数
            alpha=0.25,
            loss_weight=1.0),
        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
    # model training and testing settings
    train_cfg=dict(
        assigner=dict(
            type='MaxIoUAssigner',
            pos_iou_thr=0.5,
            neg_iou_thr=0.4,
            min_pos_iou=0,
            ignore_iof_thr=-1),
        allowed_border=-1,
        pos_weight=-1,
        debug=False),
    test_cfg=dict(
        nms_pre=1000,
        min_bbox_size=0,
        score_thr=0.05,
        nms=dict(type='nms', iou_threshold=0.5),
        max_per_img=100))

!!!下图是retinanet的anchor尺寸生成
mmdetection使用_第1张图片
下图是相应的anchor生成
mmdetection使用_第2张图片

2.dataset

# dataset settings
dataset_type = 'VOCDataset'
data_root = '/data/yzycode/datasets/VOCdevkit/'
#data_root = '/data/datasets/VOCdevkit/'

img_norm_cfg = dict(   # 输入图像初始化,减去均值mean并处以方差std,to_rgb表示将bgr转为rgb
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='Resize', img_scale=(1000, 600), keep_ratio=True), #(1000, 600),COCO(1333,800)
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(type='Pad', size_divisor=32),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(((1333, 800))),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img']),
        ])
]
data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type='RepeatDataset',   #to avoid reloading datasets frequently
        times=3,  #times,是倍数的一个设置。最终跑的迭代数是65行的total_epochs*times
        dataset=dict(
            type=dataset_type,        #训练总的训练测试集trainval.txt效果和训练集,验证集分开
            ann_file=data_root + 'VOC2007/ImageSets/Main/trainval.txt',  #VOC207和VOC2007
            img_prefix=data_root + 'VOC2007/',   #数据集名字不是VOC2007或者VOC2012,要自己去voc.py修改:https://www.pythonheidong.com/blog/article/1134278/5d81d8615a9fd6e9ca40/
            pipeline=train_pipeline)),  #最好就按相应的名称排好,省得改。
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',         #test
        img_prefix=data_root + 'VOC2007/',
        pipeline=test_pipeline),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        pipeline=test_pipeline))
evaluation = dict(interval=1, metric='mAP')

# dataset settings
dataset_type = 'CocoDataset'  # 数据集名称
data_root = 'data/coco/'	# 根路径
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)#输入图像归一化,减去均值mean并处以方差std,将bgr转为rgb 
train_pipeline = [ # 数据预处理和增强操作
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1333, 800),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img']),
        ])
]
data = dict(
    samples_per_gpu=2, #每张GPU的bitchsize
    workers_per_gpu=2, #工作的线程数
    train=dict(
        type=dataset_type, #下面的路径,也可以直接给绝对路径
        ann_file=data_root + 'annotations/instances_train2017.json',
        img_prefix=data_root + 'train2017/' ,
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_val2017.json',
        img_prefix=data_root + 'val2017/' ,
        pipeline=test_pipeline),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_val2017.json',
        img_prefix=data_root + 'val2017/',
        # ann_file=data_root + 'annotations/image_info_test-dev2017.json',
        # img_prefix=data_root + 'test2017/',
        pipeline=test_pipeline))
evaluation = dict(interval=1, metric='bbox') #评估的方法,可以修改
# 后面可能还会详细讲,因为数据增强这里会用到

3.optimizer

# optimizer
optimizer = dict(type='SGD', lr=0.0025, momentum=0.9, weight_decay=0.0001) 
#注意下面的初始学习率,根据自己的GPU和给的总batchsize修改
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
    policy='step', #优化策略
    warmup='linear', #初始的学习率增加的策略,linear为线性增加
    warmup_iters=500, #在初始的500次迭代中学习率逐渐增加 
    warmup_ratio=0.001, #起始的学习率,500后稳定在lr=0.02
    step=[8, 11]) #在第8和11个epoch时降低学习率
runner = dict(type='EpochBasedRunner', max_epochs=12) #

4.default_runtime

checkpoint_config = dict(interval=1) # 每1个epoch存储一次模型,这个可以修改
# yapf:disable
log_config = dict(
    interval=50, # 每迭代50次保存
    hooks=[
        dict(type='TextLoggerHook'), # txt信息保存
        #dict(type='TensorboardLoggerHook') # tensorboard
    ])
# yapf:enable
custom_hooks = [dict(type='NumClassCheckHook')] 

dist_params = dict(backend='nccl') # 分布式参数
log_level = 'INFO' 
work_dir = './work_dirs/faster_rcnn_r50_fpn_1x' # log文件和模型文件存储路径
load_from = None # 加载模型的路径,None表示从预训练模型加载
resume_from = None #恢复训练模型的路径
workflow = [('train', 1)]
#workflosw = [('train', 2),('val', 1)]  #表示先进行 n 个 epoch 的训练,然后再进行1个 epoch 的验证

总结

还有些细节的地方没讲到,后面讲解其他相应的内容时会有所补充

你可能感兴趣的:(深度学习,目标检测,pytorch)