1. Entrance
Mask2Former/mask2former/maskformer_model.py
features = self.backbone(images.tensor)
outputs = self.sem_seg_head(features)
2.1
Mask2Former/mask2former/modeling/meta_arch/mask_former_head.py
mask_features, transformer_encoder_features, multi_scale_features = self.pixel_decoder.forward_features(features)
predictions = self.predictor(multi_scale_features, mask_features, mask)
2.2 self.pixel_decoder.forward_features(features)
Mask2Former/mask2former/modeling/pixel_decoder/msdeformattn.py
def forward_features(self, features): # refer to 2.
y, spatial_shapes, level_start_index = self.transformer(srcs, pos)
2.2.1
Mask2Former/mask2former/modeling/pixel_decoder/msdeformattn.py
self.transformer = MSDeformAttnTransformerEncoderOnly(
2.3 predictions = self.predictor(multi_scale_features, mask_features, mask)
Mask2Former/mask2former/modeling/transformer_decoder/mask2former_transformer_decoder.py
@TRANSFORMER_DECODER_REGISTRY.register()
class MultiScaleMaskedTransformerDecoder(nn.Module):
def forward(self, x, mask_features, mask = None):
out = {
'pred_logits': predictions_class[-1],
'pred_masks': predictions_mask[-1],
'aux_outputs': self._set_aux_loss(
predictions_class if self.mask_classification else None, predictions_mask
)
}
return out