论文链接:https://arxiv.org/abs/2210.02093
代码链接:GitHub - QY1994-0919/CFPNet: Centralized Feature Pyramid for Object Detection
直接上代码,想用这个的同学直接复制底下的代码即可
import torch
import torch.nn as nn
from torch.nn import functional as F
import warnings
try:
from queue import Queue
except ImportError:
from Queue import Queue
from functools import partial
from timm.models.layers import DropPath, trunc_normal_
# LVC
class Encoding(nn.Module):
def __init__(self, in_channels, num_codes):
super(Encoding, self).__init__()
# init codewords and smoothing factor
self.in_channels, self.num_codes = in_channels, num_codes
num_codes = 64
std = 1. / ((num_codes * in_channels)**0.5)
# [num_codes, channels]
self.codewords = nn.Parameter(
torch.empty(num_codes, in_channels, dtype=torch.float).uniform_(-std, std), requires_grad=True)
# [num_codes]
self.scale = nn.Parameter(torch.empty(num_codes, dtype=torch.float).uniform_(-1, 0), requires_grad=True)
@staticmethod
def scaled_l2(x, codewords, scale):
num_codes, in_channels = codewords.size()
b = x.size(0)
expanded_x = x.unsqueeze(2).expand((b, x.size(1), num_codes, in_channels))
# ---处理codebook (num_code, c1)
reshaped_codewords = codewords.view((1, 1, num_codes, in_channels))
# 把scale从1, num_code变成 batch, c2, N, num_codes
reshaped_scale = scale.view((1, 1, num_codes)) # N, num_codes
# ---计算rik = z1 - d # b, N, num_codes
scaled_l2_norm = reshaped_scale * (expanded_x - reshaped_codewords).pow(2).sum(dim=3)
return scaled_l2_norm
@staticmethod
def aggregate(assignment_weights, x, codewords):
num_codes, in_channels = codewords.size()
# ---处理codebook
reshaped_codewords = codewords.view((1, 1, num_codes, in_channels))
b = x.size(0)
# ---处理特征向量x b, c1, N
expanded_x = x.unsqueeze(2).expand((b, x.size(1), num_codes, in_channels))
#变换rei b, N, num_codes,-
assignment_weights = assignment_weights.unsqueeze(3) # b, N, num_codes,
# ---开始计算eik,必须在Rei计算完之后
encoded_feat = (assignment_weights * (expanded_x - reshaped_codewords)).sum(1)
return encoded_feat
def forward(self, x):
assert x.dim() == 4 and x.size(1) == self.in_channels
b, in_channels, w, h = x.size()
# [batch_size, height x width, channels]
x = x.view(b, self.in_channels, -1).transpose(1, 2).contiguous()
# assignment_weights: [batch_size, channels, num_codes]
assignment_weights = F.softmax(self.scaled_l2(x, self.codewords, self.scale), dim=2)
# aggregate
encoded_feat = self.aggregate(assignment_weights, x, self.codewords)
return encoded_feat
# 1*1 3*3 1*1
class ConvBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, res_conv=False, act_layer=nn.ReLU, groups=1,
norm_layer=partial(nn.BatchNorm2d, eps=1e-6), drop_block=None, drop_path=None):
super(ConvBlock, self).__init__()
self.in_channels = in_channels
expansion = 4
c = out_channels // expansion
self.conv1 = nn.Conv2d(in_channels, c, kernel_size=1, stride=1, padding=0, bias=False) # [64, 256, 1, 1]
self.bn1 = norm_layer(c)
self.act1 = act_layer(inplace=True)
self.conv2 = nn.Conv2d(c, c, kernel_size=3, stride=stride, groups=groups, padding=1, bias=False)
self.bn2 = norm_layer(c)
self.act2 = act_layer(inplace=True)
self.conv3 = nn.Conv2d(c, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
self.bn3 = norm_layer(out_channels)
self.act3 = act_layer(inplace=True)
if res_conv:
self.residual_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
self.residual_bn = norm_layer(out_channels)
self.res_conv = res_conv
self.drop_block = drop_block
self.drop_path = drop_path
def zero_init_last_bn(self):
nn.init.zeros_(self.bn3.weight)
def forward(self, x, return_x_2=True):
residual = x
x = self.conv1(x)
x = self.bn1(x)
if self.drop_block is not None:
x = self.drop_block(x)
x = self.act1(x)
x = self.conv2(x) #if x_t_r is None else self.conv2(x + x_t_r)
x = self.bn2(x)
if self.drop_block is not None:
x = self.drop_block(x)
x2 = self.act2(x)
x = self.conv3(x2)
x = self.bn3(x)
if self.drop_block is not None:
x = self.drop_block(x)
if self.drop_path is not None:
x = self.drop_path(x)
if self.res_conv:
residual = self.residual_conv(residual)
residual = self.residual_bn(residual)
x += residual
x = self.act3(x)
if return_x_2:
return x, x2
else:
return x
class Mean(nn.Module):
def __init__(self, dim, keep_dim=False):
super(Mean, self).__init__()
self.dim = dim
self.keep_dim = keep_dim
def forward(self, input):
return input.mean(self.dim, self.keep_dim)
class Mlp(nn.Module):
"""
Implementation of MLP with 1*1 convolutions. Input: tensor with shape [B, C, H, W]
"""
def __init__(self, in_features, hidden_features=None,
out_features=None, act_layer=nn.GELU, drop=0.):
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
self.fc1 = nn.Conv2d(in_features, hidden_features, 1)
self.act = act_layer()
self.fc2 = nn.Conv2d(hidden_features, out_features, 1)
self.drop = nn.Dropout(drop)
self.apply(self._init_weights)
def _init_weights(self, m):
if isinstance(m, nn.Conv2d):
trunc_normal_(m.weight, std=.02)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def forward(self, x):
x = self.fc1(x)
x = self.act(x)
x = self.drop(x)
x = self.fc2(x)
x = self.drop(x)
return x
class LayerNormChannel(nn.Module):
"""
LayerNorm only for Channel Dimension.
Input: tensor in shape [B, C, H, W]
"""
def __init__(self, num_channels, eps=1e-05):
super().__init__()
self.weight = nn.Parameter(torch.ones(num_channels))
self.bias = nn.Parameter(torch.zeros(num_channels))
self.eps = eps
def forward(self, x):
u = x.mean(1, keepdim=True)
s = (x - u).pow(2).mean(1, keepdim=True)
x = (x - u) / torch.sqrt(s + self.eps)
x = self.weight.unsqueeze(-1).unsqueeze(-1) * x \
+ self.bias.unsqueeze(-1).unsqueeze(-1)
return x
class GroupNorm(nn.GroupNorm):
"""
Group Normalization with 1 group.
Input: tensor in shape [B, C, H, W]
"""
def __init__(self, num_channels, **kwargs):
super().__init__(1, num_channels, **kwargs)
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
class SiLU(nn.Module):
"""export-friendly version of nn.SiLU()"""
@staticmethod
def forward(x):
return x * torch.sigmoid(x)
def get_activation(name="silu", inplace=True):
if name == "silu":
module = nn.SiLU(inplace=inplace)
elif name == "relu":
module = nn.ReLU(inplace=inplace)
elif name == "lrelu":
module = nn.LeakyReLU(0.1, inplace=inplace)
else:
raise AttributeError("Unsupported act type: {}".format(name))
return module
class BaseConv(nn.Module):
"""A Conv2d -> Batchnorm -> silu/leaky relu block""" # CBL
def __init__(
self, in_channels, out_channels, ksize, stride, groups=1, bias=False, act="silu"
):
super().__init__()
# same padding
pad = (ksize - 1) // 2
self.conv = nn.Conv2d(
in_channels,
out_channels,
kernel_size=ksize,
stride=stride,
padding=pad,
groups=groups,
bias=bias,
)
self.bn = nn.BatchNorm2d(out_channels)
self.act = get_activation(act, inplace=True)
def forward(self, x):
return self.act(self.bn(self.conv(x)))
def fuseforward(self, x):
return self.act(self.conv(x))
class DWConv(nn.Module):
"""Depthwise Conv + Conv"""
def __init__(self, in_channels, out_channels, ksize, stride=1, act="silu"):
super().__init__()
self.dconv = BaseConv(
in_channels,
in_channels,
ksize=ksize,
stride=stride,
groups=in_channels,
act=act,
)
self.pconv = BaseConv(
in_channels, out_channels, ksize=1, stride=1, groups=1, act=act
)
def forward(self, x):
x = self.dconv(x)
return self.pconv(x)
class LVCBlock(nn.Module):
def __init__(self, in_channels, out_channels, num_codes, channel_ratio=0.25, base_channel=64):
super(LVCBlock, self).__init__()
self.out_channels = out_channels
self.num_codes = num_codes
num_codes = 64
self.conv_1 = ConvBlock(in_channels=in_channels, out_channels=in_channels, res_conv=True, stride=1)
self.LVC = nn.Sequential(
nn.Conv2d(in_channels, in_channels, 1, bias=False),
nn.BatchNorm2d(in_channels),
nn.ReLU(inplace=True),
Encoding(in_channels=in_channels, num_codes=num_codes),
nn.BatchNorm1d(num_codes),
nn.ReLU(inplace=True),
Mean(dim=1))
self.fc = nn.Sequential(nn.Linear(in_channels, in_channels), nn.Sigmoid())
def forward(self, x):
x = self.conv_1(x, return_x_2=False)
en = self.LVC(x)
gam = self.fc(en)
b, in_channels, _, _ = x.size()
y = gam.view(b, in_channels, 1, 1)
x = F.relu_(x + x * y)
return x
# LightMLPBlock
class LightMLPBlock(nn.Module):
def __init__(self, in_channels, out_channels, ksize=1, stride=1, act="silu",
mlp_ratio=4., drop=0., act_layer=nn.GELU,
use_layer_scale=True, layer_scale_init_value=1e-5, drop_path=0., norm_layer=GroupNorm): # act_layer=nn.GELU,
super().__init__()
self.dw = DWConv(in_channels, out_channels, ksize=1, stride=1, act="silu")
self.linear = nn.Linear(out_channels, out_channels) # learnable position embedding
self.out_channels = out_channels
self.norm1 = norm_layer(in_channels)
self.norm2 = norm_layer(in_channels)
mlp_hidden_dim = int(in_channels * mlp_ratio)
self.mlp = Mlp(in_features=in_channels, hidden_features=mlp_hidden_dim, act_layer=nn.GELU,
drop=drop)
self.drop_path = DropPath(drop_path) if drop_path > 0. \
else nn.Identity()
self.use_layer_scale = use_layer_scale
if use_layer_scale:
self.layer_scale_1 = nn.Parameter(
layer_scale_init_value * torch.ones((out_channels)), requires_grad=True)
self.layer_scale_2 = nn.Parameter(
layer_scale_init_value * torch.ones((out_channels)), requires_grad=True)
def forward(self, x):
if self.use_layer_scale:
x = x + self.drop_path(self.layer_scale_1.unsqueeze(-1).unsqueeze(-1) * self.dw(self.norm1(x)))
x = x + self.drop_path(self.layer_scale_2.unsqueeze(-1).unsqueeze(-1) * self.mlp(self.norm2(x)))
else:
x = x + self.drop_path(self.dw(self.norm1(x)))
x = x + self.drop_path(self.mlp(self.norm2(x)))
return x
# EVCBlock
class EVCBlock(nn.Module):
def __init__(self, in_channels, out_channels, channel_ratio=4, base_channel=16):
super().__init__()
expansion = 2
ch = out_channels * expansion
# Stem stage: get the feature maps by conv block (copied form resnet.py) 进入conformer框架之前的处理
self.conv1 = nn.Conv2d(in_channels, in_channels, kernel_size=7, stride=1, padding=3, bias=False) # 1 / 2 [112, 112]
self.bn1 = nn.BatchNorm2d(in_channels)
self.act1 = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=1, padding=1) # 1 / 4 [56, 56]
# LVC
self.lvc = LVCBlock(in_channels=in_channels, out_channels=out_channels, num_codes=64) # c1值暂时未定
# LightMLPBlock
self.l_MLP = LightMLPBlock(in_channels, out_channels, ksize=1, stride=1, act="silu", act_layer=nn.GELU, mlp_ratio=4., drop=0.,
use_layer_scale=True, layer_scale_init_value=1e-5, drop_path=0., norm_layer=GroupNorm)
self.cnv1 = nn.Conv2d(ch, out_channels, kernel_size=1, stride=1, padding=0)
def forward(self, x):
x1 = self.maxpool(self.act1(self.bn1(self.conv1(x))))
# LVCBlock
x_lvc = self.lvc(x1)
# LightMLPBlock
x_lmlp = self.l_MLP(x1)
# concat
x = torch.cat((x_lvc, x_lmlp), dim=1)
x = self.cnv1(x)
return x
测试输入输出的代码如下,可根据需要自行修改输入输出通道数:
a = torch.ones(3,8,20,20)
b = EVCBlock(8,8)
c = b(a)
print(c.size())
谢谢大家帮忙点赞收藏,后面会持续更新的!点赞越多,更新越快!