此时mmdetection的版本为v1.1.0
,根据INSTALL先创建虚拟环境并且下载相关依赖,数据为coco2017
。
# 创建虚拟环境
conda create -n open-mmlab python=3.7 -y
conda activate open-mmlab
# 记得使用‘nvcc -V’查看自己cuda的版本,然后到pytorch官网下载相应版本,这里只是一个例子
conda install pytorch torchvision -c pytorch
# 克隆mmdetection
git clone https://github.com/open-mmlab/mmdetection.git
cd mmdetection
# 下载相关依赖
pip install -r requirements/build.txt
pip install "git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI"
pip install -v -e . # or "python setup.py develop"
至于FCOS的论文解读,可以看我另一篇博客【ICCV2019】FCOS
对mmdetection/configs/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu.py
进行修改。
# model settings
model = dict(
type='FCOS',
pretrained='/home/wh/weights/resnet50_caffe-788b5fa3.pth', #修改为自己预训练模型的路径,如果没有可以不用改。
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3), # c2,c3,c4,c5
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
style='caffe'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1, # 从c3开始
add_extra_convs=True,
extra_convs_on_inputs=False, #True代表以C5为输入,False代表以P5为输出
num_outs=5,
relu_before_extra_convs=True),
bbox_head=dict(
type='FCOSHead',
num_classes=81,
in_channels=256,
stacked_convs=4,
feat_channels=256,
strides=[8, 16, 32, 64, 128],
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='IoULoss', loss_weight=1.0),
loss_centerness=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)))
# training and testing settings
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.4,
min_pos_iou=0,
ignore_iof_thr=-1),
allowed_border=-1,
pos_weight=-1,
debug=False)
test_cfg = dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100)
# dataset settings
dataset_type = 'CocoDataset'
data_root = '/hdd2/wh/coco2017/' #修改为自己数据集所在位置
img_norm_cfg = dict(
mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
imgs_per_gpu=4, # 每个gpu上放几张图片,默认是4,根据卡实际情况修改
workers_per_gpu=4,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline))
evaluation = dict(interval=1, metric='bbox')
# optimizer
optimizer = dict(
type='SGD',
lr=0.01, #这里的学习率默认为0.01(batchsize=8) 要根据实际batchsize进行比例变化 比如batchsize为2,就要设置为0.0025
momentum=0.9,
weight_decay=0.0001,
paramwise_options=dict(bias_lr_mult=2., bias_decay_mult=0.))
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='constant',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/fcos_r50_caffe_fpn_gn_1x_4gpu' #默认的checkpoint的储存路径
load_from = None
resume_from = None
workflow = [('train', 1)]
python tools/train.py configs/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu.py
./tools/dist_train.sh configs/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu.py 2 # 2代表了gpu的个数
./tools/dist_train.sh configs/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu.py 2 --resume_from ${CHECKPOINT_FILE}
关于模型如何在代码中建立的可以参考mmdetection源码阅读笔记(0)–创建模型。
主要来看/mmdetection/mmdet/models/anchor_heads/fcos_head.py
。这个不仅构造了head层,网络的损失函数也在这里定义。
C1、C2为下采样4倍。
_init_layers用来构造head层,也就是上图右边所示,由4个3x3卷积堆叠起来,然后进行分支。
def _init_layers(self): # 构造head层
self.cls_convs = nn.ModuleList()
self.reg_convs = nn.ModuleList()
for i in range(self.stacked_convs): #表示head有几个3x3大小的卷积核
chn = self.in_channels if i == 0 else self.feat_channels # 其实都是256
self.cls_convs.append(
ConvModule(
chn,
self.feat_channels,
3,
stride=1,
padding=1,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
bias=self.norm_cfg is None))
self.reg_convs.append(
ConvModule(
chn,
self.feat_channels,
3,
stride=1,
padding=1,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
bias=self.norm_cfg is None))
self.fcos_cls = nn.Conv2d(
self.feat_channels, self.cls_out_channels, 3, padding=1)
self.fcos_reg = nn.Conv2d(self.feat_channels, 4, 3, padding=1)
self.fcos_centerness = nn.Conv2d(self.feat_channels, 1, 3, padding=1)
init_weights是初始化参数的函数,这里就不细说了,其中所用到的参数初始化的方法来自于mmcv.cnn.weight_init。
然后就是前向传播的函数forward
参数scale是因为经过fpn后分别下采样了8, 16, 32, 64, 128倍,但是这里的值都是1,其实通过multi_apply
调用了forward_single。
def forward(self, feats):
'''
self.scales:分别代表P3-P7的scale,实际数值都是1
ModuleList(
(0): Scale() tensor(1., device='cuda:0', requires_grad=True)
(1): Scale() tensor(1., device='cuda:0', requires_grad=True)
(2): Scale() tensor(1., device='cuda:0', requires_grad=True)
(3): Scale() tensor(1., device='cuda:0', requires_grad=True)
(4): Scale() tensor(1., device='cuda:0', requires_grad=True)
)
feat:长度为5的元祖,分别对应P3-P7的输出,[5][batchsize, 256, H_i, W_i]
'''
return multi_apply(self.forward_single, feats, self.scales)
def forward_single(self, x, scale):
cls_feat = x
reg_feat = x
for cls_layer in self.cls_convs:
cls_feat = cls_layer(cls_feat)
cls_score = self.fcos_cls(cls_feat)
centerness = self.fcos_centerness(cls_feat)
for reg_layer in self.reg_convs:
reg_feat = reg_layer(reg_feat)
# scale the bbox_pred of different level
# float to avoid overflow when enabling FP16
bbox_pred = scale(self.fcos_reg(reg_feat)).float().exp()
return cls_score, bbox_pred, centerness
简单介绍下multi_apply
,这个函数在/mmdetection/mmdet/core/utils/misc.py
中。
map()是 Python 内置的高阶函数,它接收一个函数 functon 和一系列可迭代对象(list, set, dir, tuple, str等),并通过把函数 function 依次作用在 iterable 的每个元素上,得到一个新的 map对象(map object at …)并返回。对于新的map对象,可用list(map_obj)或tuple(map_obj)转化为想要的数据类型
from functools import partial
from six.moves import map, zip
def multi_apply(func, *args, **kwargs):
pfunc = partial(func, **kwargs) if kwargs else func
map_results = map(pfunc, *args) # 返回P3-P7经过forward_single后的结果,以map对象储存。
return tuple(map(list, zip(*map_results)))# 返回一个长度为3的元祖,分别代表cls_score, bbox_pred, centerness,其中每一个比如cls_score都是长度为5的list,每个list又有batchsize个数字。
functools.partial的用法如下:
import functools
def add(a, b):
return a + b
plus3 = functools.partial(add, 3)
plus3(4)
#7