Faster RCNN中Backbone(ResNet50+FPN)

Backbone

这部分展示了使用ResNet50+FPN作为Faster RCNN的特征提取部分

import os
from collections import OrderedDict
import torch
import torch.nn as nn
import torch.nn.functional as F


class IntermediateLayerGetter(nn.ModuleDict):
    def __init__(self, model, return_layers):
        # 首先判断 return_layer中的key 是否在model中
        if not set(return_layers).issubset([name for name, _ in model.named_children()]):
            raise ValueError("return_layers are not present in model")

        orig_return_layers = return_layers
        return_layers = {str(k): str(v) for k, v in return_layers.items()}
        layers = OrderedDict()

        # 遍历模型子模块按顺序存入有序字典
        # 只保存layer4及其之前的结构,舍去之后不用的结构
        for name, module in model.named_children():
            layers[name] = module
            if name in return_layers:
                del return_layers[name]
            if not return_layers:
                break

        super(IntermediateLayerGetter, self).__init__(layers)
        self.return_layers = orig_return_layers

    def forward(self, x):
        out = OrderedDict()
        # 依次遍历模型的所有子模块,并进行正向传播,
        # 收集layer1, layer2, layer3, layer4的输出
        for name, module in self.items():
            x = module(x)
            if name in self.return_layers:
                out_name = self.return_layers[name]
                out[out_name] = x
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channel, out_channel, stride=1, downsample=None, norm_layer=nn.BatchNorm2d):
        super(Bottleneck, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=1, stride=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channel)

        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channel)

        self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion, kernel_size=1, stride=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channel * self.expansion)

        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identify = x
        if self.downsample is not None:
            identify = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += identify
        out = self.relu(out)

        return out


class ResNet(nn.Module):
    # 仅包含分类器前面的模型结构
    def __init__(self, block, blocks_num, num_classes=1000):
        super(ResNet, self).__init__()

        self._norm_layer = nn.BatchNorm2d

        self.in_channel = 64

        self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, blocks_num[0])
        self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
        self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
        self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

    def _make_layer(self, block, channel, block_num, stride=1):
        norm_layer = self._norm_layer
        downsample = None
        if stride != 1 or self.in_channel != channel * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
                norm_layer(channel * block.expansion))

        layers = []
        layers.append(block(self.in_channel, channel, downsample=downsample,
                            stride=stride, norm_layer=norm_layer))
        self.in_channel = channel * block.expansion

        for _ in range(1, block_num):
            layers.append(block(self.in_channel, channel, norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        return x


class MaxPool_Block(nn.Module):

    def forward(self, x, names):
        names.append('pool')
        # input, kernel_size, stride, padding
        x.append(F.max_pool2d(x[-1], 1, 2, 0))
        return x, names


class FPN(nn.Module):
    def __init__(self, backbone_output_channels_list, out_channels, maxpool_blocks=True):
        super(FPN, self).__init__()

        # 用来调整resnet输出特征矩阵(layer1,2,3,4)的channel(kernel_size=1)
        self.inner_blocks = nn.ModuleList()
        # 对调整后的特征矩阵使用3x3的卷积核来得到对应的预测特征矩阵
        self.layer_blocks = nn.ModuleList()

        for backbone_channels in backbone_output_channels_list:
            inner_block_module = nn.Conv2d(backbone_channels, out_channels, 1)
            layer_block_module = nn.Conv2d(out_channels, out_channels, 3, padding=1)

            self.inner_blocks.append(inner_block_module)
            self.layer_blocks.append(layer_block_module)
        # 初始化权重参数
        for m in self.children():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_uniform_(m.weight, a=1)
                nn.init.constant_(m.bias, 0)

        self.extra_blocks = maxpool_blocks

    def forward(self, x):
        # 对应backbone中layer1、2、3、4特征图的输出的key
        names = list(x.keys())
        # 对应backbone中layer1、2、3、4特征图的输出的value
        x = list(x.values())
        # backbone中layer4的输出调整到指定维度上 channels: 2048->256
        last_inner = self.inner_blocks[-1](x[-1])
        # results中保存着经过3*3conv后的每个预测特征层
        results = []
        # layer4
        results.append(self.layer_blocks[-1](last_inner))
        # layer4+layer3
        layer3_inner = self.inner_blocks[2](x[2])
        layer3_shape = layer3_inner.shape[-2:]
        last_layer3 = F.interpolate(last_inner, size=layer3_shape, mode="nearest")
        last_layer3_sum = layer3_inner + last_layer3
        results.insert(0, self.layer_blocks[2](last_layer3_sum))
        # layer3+layer2
        layer2_inner = self.inner_blocks[1](x[1])
        layer2_shape = layer2_inner.shape[-2:]
        layer3_layer2 = F.interpolate(last_layer3_sum, size=layer2_shape, mode="nearest")
        layer3_layer2_sum = layer2_inner + layer3_layer2
        results.insert(0, self.layer_blocks[1](layer3_layer2_sum))
        # layer2+layer1
        layer1_inner = self.inner_blocks[0](x[0])
        layer1_shape = layer1_inner.shape[-2:]
        layer2_layer1 = F.interpolate(layer3_layer2_sum, size=layer1_shape, mode="nearest")
        layer2_layer1_sum = layer1_inner + layer2_layer1
        results.insert(0, self.layer_blocks[0](layer2_layer1_sum))
        # results 存储着FPN后特征图从大到小的 key:0,1,2,3,4. value(H W shape):[1/4, 1/8, 1/16, 1/32, 1/64]
        if self.extra_blocks:
            results, names = self.extra_blocks(results, names)
        # out: key: 0,1,2,3,pool
        out = OrderedDict([(k, v) for k, v in zip(names, results)])

        return out


class Backbone_FPN(nn.Module):
    def __init__(self, backbone, return_layers, in_channels_list, out_channels):
        super(Backbone_FPN, self).__init__()

        maxpool_block = MaxPool_Block()
        self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
        self.fpn = FPN(
            backbone_output_channels_list=in_channels_list,
            out_channels=out_channels,
            maxpool_blocks=maxpool_block,
        )

        self.out_channels = out_channels

    def forward(self, x):
        x = self.body(x)
        x = self.fpn(x)

        return x


def ResNet50_FPN_backbone():
    resnet_backbone = ResNet(Bottleneck, [3, 4, 6, 3])
    return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'}
    in_channels_list = [256, 512, 1024, 2048]
    out_channels = 256

    return Backbone_FPN(resnet_backbone, return_layers, in_channels_list, out_channels)


if __name__ == '__main__':
    model = ResNet50_FPN_backbone()
    print(model)

你可能感兴趣的:(目标检测,pytorch,深度学习,目标检测)