在用FasterRCNN跑自己的数据集的时候,发现几乎没有性能(0.8%),后来找到原因发现是:
FasterRCNN中,第二个stage的SingleROIExtractor的featmap_strides写成了[8, 16, 32, 64]
roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]
),
......
为什么这个变量影响这么大,下面查看代码进行分析。
(mmdetection/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py)
class SingleRoIExtractor(BaseRoIExtractor):
def __init__(self,
roi_layer: ConfigType,
out_channels: int,
featmap_strides: List[int],
finest_scale: int = 56,
init_cfg: OptMultiConfig = None) -> None:
super().__init__(
roi_layer=roi_layer,
out_channels=out_channels,
featmap_strides=featmap_strides,
init_cfg=init_cfg)
self.finest_scale = finest_scale
(mmdetection/mmdet/models/roi_heads/roi_extractors/base_roi_extractor.py)
class BaseRoIExtractor(BaseModule, metaclass=ABCMeta):
def __init__(self,
roi_layer: ConfigType, # dict(type='RoIAlign', output_size=7, sampling_ratio=0)
out_channels: int,
featmap_strides: List[int],
init_cfg: OptMultiConfig = None) -> None:
super().__init__(init_cfg=init_cfg)
self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides)
def build_roi_layers(self, layer_cfg: ConfigType,
featmap_strides: List[int]) -> nn.ModuleList:
cfg = layer_cfg.copy()
layer_type = cfg.pop('type')
if isinstance(layer_type, str):
assert hasattr(ops, layer_type)
layer_cls = getattr(ops, layer_type)
else:
layer_cls = layer_type # RoIAlign
roi_layers = nn.ModuleList(
[layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides])
return roi_layers
(mmcv\ops\roi_align.py)
class RoIAlign(nn.Module):
def forward(self, input: torch.Tensor, rois: torch.Tensor) -> torch.Tensor:
......
return roi_align(input, rois, self.output_size, self.spatial_scale,
self.sampling_ratio, self.pool_mode, self.aligned)
也就是对于RCNN阶段,用来提取特征的特征层就是neck后给出的特征层的前len(self.featmap_strides)层x[:len(self.featmap_strides)],而起始层不受self.featmap_strides的控制(也没有其它参数控制),但self.featmap_strides会影响ROIAlign时候rois和特征图的尺度映射,从这个角度上说featmap_strides的修改只能是删除尾部的任意个连续的stride,不能修改起始的stride
(mmdetection/mmdet/models/roi_heads/standard_roi_head.py)
class StandardRoIHead(BaseRoIHead):
"""Simplest base roi head including one bbox head and one mask head."""
def _bbox_forward(self, x: Tuple[Tensor], rois: Tensor) -> dict:
"""Box head forward function used in both training and testing.
Args:
x (tuple[Tensor]): List of multi-level img features.
rois (Tensor): RoIs with the shape (n, 5) where the first
column indicates batch id of each RoI
"""
# TODO: a more flexible way to decide which feature maps to use
bbox_feats = self.bbox_roi_extractor(
x[:self.bbox_roi_extractor.num_inputs], rois)
class BaseRoIExtractor(BaseModule, metaclass=ABCMeta):
@property
def num_inputs(self) -> int:
"""int: Number of input feature maps."""
return len(self.featmap_strides)
通过对下面代码的分析,可以知道,在RCNN阶段
class SingleRoIExtractor(BaseRoIExtractor):
def __init__(self,
roi_layer: ConfigType,
out_channels: int,
featmap_strides: List[int],
finest_scale: int = 56,
init_cfg: OptMultiConfig = None) -> None:
super().__init__(
roi_layer=roi_layer,
out_channels=out_channels,
featmap_strides=featmap_strides,
init_cfg=init_cfg)
self.finest_scale = finest_scale
def map_roi_levels(self, rois: Tensor, num_levels: int) -> Tensor:
"""Map rois to corresponding feature levels by scales.
- scale < finest_scale * 2: level 0
- finest_scale * 2 <= scale < finest_scale * 4: level 1
- finest_scale * 4 <= scale < finest_scale * 8: level 2
- scale >= finest_scale * 8: level 3
Args:
rois (Tensor): Input RoIs, shape (k, 5).
num_levels (int): Total level number.
Returns:
Tensor: Level index (0-based) of each RoI, shape (k, )
"""
scale = torch.sqrt(
(rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]))
target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6))
target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long()
return target_lvls