文章解读:超越ShuffleNet、MobileNet、MobileViT等模型的新backbone FasterNet_athrunsunny的博客-CSDN博客
论文地址:https://arxiv.org/abs/2303.03667
创建网络配置文件:
yolov5-pconv.yaml
# YOLOv5 by Ultralytics, GPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 1 # model depth multiple
width_multiple: 1 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, PatchEmbed, [40, 4, 4, nn.BatchNorm2d]], # 0-P1/4
[-1, 1, BasicStage, [40, 1]], # 1
[-1, 1, PatchMerging, [80,2,2]], # 2-P2/8
[-1, 1, BasicStage, [80, 1]], # 3
[-1, 1, BasicStage, [80, 1]], # 4
[-1, 1, PatchMerging, [160,2,2]], # 5
[-1, 1, BasicStage, [160, 1]], # 6
[-1, 1, BasicStage, [160, 1]], # 7
[-1, 1, BasicStage, [160, 1]], # 8
[-1, 1, BasicStage, [160, 1]], # 9
[-1, 1, BasicStage, [160, 1]], # 10
[-1, 1, BasicStage, [160, 1]], # 11
[-1, 1, BasicStage, [160, 1]], # 12
[-1, 1, BasicStage, [160, 1]], # 13
[-1, 1, PatchMerging, [320,2,2]], # 14
[-1, 1, BasicStage, [320, 1]], # 15
[-1, 1, BasicStage, [320, 1]], # 16
[-1, 1, SPPF, [320, 5]], # 17
]
# YOLOv5 v6.0 head
head:
[[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 13], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [256, False]], # 21
[-1, 1, Conv, [128, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [128, False]], # 25 (P3/8-small)
[-1, 1, Conv, [128, 3, 2]],
[[-1, 22], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [256, False]], # 28 (P4/16-medium)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 18], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [512, False]], # 31 (P5/32-large)
[[25, 28, 31], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]
在models\common.py中添加:
class Partial_conv3(nn.Module):
def __init__(self, dim, n_div, forward):
super().__init__()
self.dim_conv3 = dim // n_div
self.dim_untouched = dim - self.dim_conv3
self.partial_conv3 = nn.Conv2d(self.dim_conv3, self.dim_conv3, 3, 1, 1, bias=False)
if forward == 'slicing':
self.forward = self.forward_slicing
elif forward == 'split_cat':
self.forward = self.forward_split_cat
else:
raise NotImplementedError
def forward_slicing(self, x):
# only for inference
x = x.clone() # !!! Keep the original input intact for the residual connection later
x[:, :self.dim_conv3, :, :] = self.partial_conv3(x[:, :self.dim_conv3, :, :])
return x
def forward_split_cat(self, x):
# for training/inference
x1, x2 = torch.split(x, [self.dim_conv3, self.dim_untouched], dim=1)
x1 = self.partial_conv3(x1)
x = torch.cat((x1, x2), 1)
return x
class MLPBlock(nn.Module):
def __init__(self,
dim,
n_div,
mlp_ratio,
drop_path,
layer_scale_init_value,
act_layer,
norm_layer,
pconv_fw_type
):
super().__init__()
self.dim = dim
self.mlp_ratio = mlp_ratio
self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
self.n_div = n_div
mlp_hidden_dim = int(dim * mlp_ratio)
mlp_layer = [
nn.Conv2d(dim, mlp_hidden_dim, 1, bias=False),
norm_layer(mlp_hidden_dim),
act_layer(),
nn.Conv2d(mlp_hidden_dim, dim, 1, bias=False)
]
self.mlp = nn.Sequential(*mlp_layer)
self.spatial_mixing = Partial_conv3(
dim,
n_div,
pconv_fw_type
)
if layer_scale_init_value > 0:
self.layer_scale = nn.Parameter(layer_scale_init_value * torch.ones((dim)), requires_grad=True)
self.forward = self.forward_layer_scale
else:
self.forward = self.forward
def forward(self, x):
shortcut = x
x = self.spatial_mixing(x)
x = shortcut + self.drop_path(self.mlp(x))
return x
def forward_layer_scale(self, x):
shortcut = x
x = self.spatial_mixing(x)
x = shortcut + self.drop_path(
self.layer_scale.unsqueeze(-1).unsqueeze(-1) * self.mlp(x))
return x
class BasicStage(nn.Module):
def __init__(self,
dim,
depth=1,
n_div=4,
mlp_ratio=2,
# drop_path=[0.0],
layer_scale_init_value=0,
norm_layer=nn.BatchNorm2d,
act_layer=nn.ReLU,
pconv_fw_type='split_cat'
):
super().__init__()
dpr = [x.item()
for x in torch.linspace(0, 0.0, sum([1, 2, 8, 2]))]
blocks_list = [
MLPBlock(
dim=dim,
n_div=n_div,
mlp_ratio=mlp_ratio,
drop_path=dpr[i],
layer_scale_init_value=layer_scale_init_value,
norm_layer=norm_layer,
act_layer=act_layer,
pconv_fw_type=pconv_fw_type
)
for i in range(depth)
]
self.blocks = nn.Sequential(*blocks_list)
def forward(self, x):
x = self.blocks(x)
return x
class PatchEmbed(nn.Module):
def __init__(self, in_chans, embed_dim, patch_size, patch_stride, norm_layer):
super().__init__()
self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_stride, bias=False)
if norm_layer is not None:
self.norm = norm_layer(embed_dim)
else:
self.norm = nn.Identity()
def forward(self, x):
x = self.norm(self.proj(x))
return x
def fuseforward(self, x):
x = self.proj(x)
return x
class PatchMerging(nn.Module):
def __init__(self, dim, out_dim, k, patch_stride2, norm_layer=nn.BatchNorm2d):
super().__init__()
self.reduction = nn.Conv2d(dim, out_dim, kernel_size=k, stride=patch_stride2, bias=False)
if norm_layer is not None:
self.norm = norm_layer(out_dim)
else:
self.norm = nn.Identity()
def forward(self, x):
x = self.norm(self.reduction(x))
return x
def fuseforward(self, x):
x = self.reduction(x)
return x
在yolo.py中的parse_model中做如下修改
def parse_model(d, ch): # model_dict, input_channels(3)
# Parse a YOLOv5 model.yaml dictionary
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation')
if act:
Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU()
LOGGER.info(f"{colorstr('activation:')} {act}") # print
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
m = eval(m) if isinstance(m, str) else m # eval strings
for j, a in enumerate(args):
with contextlib.suppress(NameError):
args[j] = eval(a) if isinstance(a, str) else a # eval strings
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in {
Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x, StemBlock,
BlazeBlock, DoubleBlazeBlock, ShuffleV2Block, MobileBottleneck, InvertedResidual, ConvBNReLU,
RepVGGBlock, SEBlock, RepBlock, SimCSPSPPF,BasicStage,PatchEmbed,PatchMerging}:
c1, c2 = ch[f], args[0]
if c2 != no: # if not output
c2 = make_divisible(c2 * gw, 8)
if m == InvertedResidual:
c2 = make_divisible(c2 * gw, 4 if gw == 0.1 else 8)
args = [c1, c2, *args[1:]]
if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
args.insert(2, n) # number of repeats
n = 1
if m in {BasicStage}:
args.pop(1)
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum(ch[x] for x in f)
# TODO: channel, gw, gd
elif m in {Detect}:
args.append([ch[x] for x in f])
if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f)
elif m is Contract:
c2 = ch[f] * args[0] ** 2
elif m is Expand:
c2 = ch[f] // args[0] ** 2
else:
c2 = ch[f]
m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace('__main__.', '') # module type
np = sum(x.numel() for x in m_.parameters()) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}') # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
if i == 0:
ch = []
ch.append(c2)
return nn.Sequential(*layers), sorted(save)
在yolo.py中的BaseModel中的fuse函数做如下修改:
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
LOGGER.info('Fusing layers... ')
for m in self.model.modules():
if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
delattr(m, 'bn') # remove batchnorm
m.forward = m.forward_fuse # update forward
if type(m) is PatchEmbed:
m.proj = fuse_conv_and_bn(m.proj, m.norm)
delattr(m, 'norm') # remove batchnorm
m.forward = m.fuseforward
if type(m) is PatchMerging:
m.reduction = fuse_conv_and_bn(m.reduction, m.norm)
delattr(m, 'norm') # remove batchnorm
m.forward = m.fuseforward
if type(m) is MLPBlock:
if hasattr(m, 'mlp'):
re_mlp = nn.Sequential(
nn.Conv2d(m.mlp[0].in_channels, m.mlp[0].out_channels,
kernel_size=m.mlp[0].kernel_size, stride=m.mlp[0].stride,
padding=m.mlp[0].padding, groups=m.mlp[0].groups),
nn.ReLU(inplace=True),
nn.Conv2d(m.mlp[3].in_channels, m.mlp[3].out_channels,
kernel_size=m.mlp[3].kernel_size, stride=m.mlp[3].stride,
padding=m.mlp[3].padding, bias=False),
)
re_mlp[0] = fuse_conv_and_bn(m.mlp[0], m.mlp[1])
# delattr(m, 'se')
m.mlp = re_mlp
直接运行yolo.py可以看网络结构