我们发现经过激活函数之后,x的分布趋向于两侧,这样收敛的肯定越来越慢,所以我们想把他拉回正态分布。
yolo.py
218: def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
LOGGER.info('Fusing layers... ')
for m in self.model.modules():
if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
delattr(m, 'bn') # remove batchnorm
m.forward = m.forward_fuse # update forward
self.info()
return self
#detect.py #Load model
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data)
for i in model.model.modules():
if type(i) is C3:
i._switch_to_deploy()
else:
pass
在这里我们可以有多种选择来进行_switch_to_deploy操作,其实就是把中间的一个网络结构(以C3为例)进行重参数化,在这里需要注意的两点:
①直接i._switch_to_deploy就可以进行结构调整,在def _switch_to_deploy()函数中不需要return
② for i in model.model.modules():
i._switch_to_deploy()
直接就可以对model进行调整,不需要重新nn.Sequential().add_module操作
③ 我们可以嵌套式地调用_switch_to_deploy
class C3(nn.Module):
# CSP Bottleneck with 3 convolutions
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
super().__init__()
def _switch_to_deploy(self):
# self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
for i in self.m:
i._switch_to_deploy() #Bottleneck
class Bottleneck(nn.Module):
# Standard bottleneck
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
super().__init__()
c_ = int(c2 * e) # hidden channels
self.cv1 = Conv_C3(c1, c1, k=3,s=1,g=g)
self.cv2 = Conv_C3(c1, c2, k=3,s= 1, g=g)
self.add = shortcut and c1 == c2
def _switch_to_deploy(self):
self.cv1._switch_to_deploy()
self.cv2._switch_to_deploy()
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
这样就可以把Conv嵌套在其他的函数中,方便直接进行_switch_to_deploy
此处有一点需要注意,就是在重参数化的应用目前只能够运用在in_channel=out_channel的情况下,因为有nn.Identity()的合并,如果in_channel!=out_channel的话就会出现向量大小不匹配!
然而此处也有一个更广泛的idea
import torch
from torch import mean, nn
from collections import OrderedDict
from torch.nn import functional as F
import numpy as np
from numpy import random
# from se_block import SEBlock
def setup_seed(seed):
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.deterministic = True
def _conv_bn(input_channel, output_channel, kernel_size=3, padding=1, stride=1, groups=1):
res = nn.Sequential()
res.add_module('conv', nn.Conv2d(in_channels=input_channel, out_channels=output_channel, kernel_size=kernel_size,
padding=padding, padding_mode='zeros', stride=stride, groups=groups, bias=False))
res.add_module('bn', nn.BatchNorm2d(output_channel))
return res
class RepBlock(nn.Module):
def __init__(self, input_channel, output_channel, kernel_size=3, groups=1, stride=1, deploy=False, use_se=False):
super().__init__()
self.use_se = use_se
self.input_channel = input_channel
self.output_channel = output_channel
self.deploy = deploy
self.kernel_size = kernel_size
self.padding = kernel_size // 2
self.groups = groups
self.activation = nn.ReLU()
# make sure kernel_size=3 padding=1
assert self.kernel_size == 3
assert self.padding == 1
if (not self.deploy):
self.brb_3x3 = _conv_bn(input_channel, output_channel, kernel_size=self.kernel_size, padding=self.padding,
groups=groups)
self.brb_1x1 = _conv_bn(input_channel, output_channel, kernel_size=1, padding=0, groups=groups)
self.brb_identity = nn.BatchNorm2d(
self.input_channel) if self.input_channel == self.output_channel else None
else:
self.brb_rep = nn.Conv2d(in_channels=input_channel, out_channels=output_channel,
kernel_size=self.kernel_size, padding=self.padding, padding_mode='zeros',
stride=stride, bias=True)
if (self.use_se):
self.se = SEBlock(input_channel, input_channel // 16)
else:
self.se = nn.Identity()
def forward(self, inputs):
if (self.deploy):
return self.activation(self.se(self.brb_rep(inputs)))
if (self.brb_identity == None):
identity_out = 0
else:
identity_out = self.brb_identity(inputs)
return self.activation(self.se(self.brb_1x1(inputs) + self.brb_3x3(inputs) + identity_out))
def _switch_to_deploy(self):
self.deploy = True
kernel, bias = self._get_equivalent_kernel_bias()
self.brb_rep = nn.Conv2d(in_channels=self.brb_3x3.conv.in_channels, out_channels=self.brb_3x3.conv.out_channels,
kernel_size=self.brb_3x3.conv.kernel_size, padding=self.brb_3x3.conv.padding,
padding_mode=self.brb_3x3.conv.padding_mode, stride=self.brb_3x3.conv.stride,
groups=self.brb_3x3.conv.groups, bias=True)
self.brb_rep.weight.data = kernel
self.brb_rep.bias.data = bias
# 消除梯度更新
for para in self.parameters():
para.detach_()
# 删除没用的分支
self.__delattr__('brb_3x3')
self.__delattr__('brb_1x1')
self.__delattr__('brb_identity')
# 将1x1的卷积变成3x3的卷积参数
def _pad_1x1_kernel(self, kernel):
if (kernel is None):
return 0
else:
return F.pad(kernel, [1] * 4)
# 将identity,1x1,3x3的卷积融合到一起,变成一个3x3卷积的参数
def _get_equivalent_kernel_bias(self):
brb_3x3_weight, brb_3x3_bias = self._fuse_conv_bn(self.brb_3x3)
brb_1x1_weight, brb_1x1_bias = self._fuse_conv_bn(self.brb_1x1)
brb_id_weight, brb_id_bias = self._fuse_conv_bn(self.brb_identity)
return brb_3x3_weight + self._pad_1x1_kernel(
brb_1x1_weight) + brb_id_weight, brb_3x3_bias + brb_1x1_bias + brb_id_bias
### 将卷积和BN的参数融合到一起
def _fuse_conv_bn(self, branch):
if (branch is None):
return 0, 0
elif (isinstance(branch, nn.Sequential)):
kernel = branch.conv.weight
running_mean = branch.bn.running_mean
running_var = branch.bn.running_var
gamma = branch.bn.weight
beta = branch.bn.bias
eps = branch.bn.eps
else:
assert isinstance(branch, nn.BatchNorm2d)
if not hasattr(self, 'id_tensor'):
input_dim = self.input_channel // self.groups
kernel_value = np.zeros((self.input_channel, input_dim, 3, 3), dtype=np.float32)
for i in range(self.input_channel):
kernel_value[i, i % input_dim, 1, 1] = 1
self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device)
kernel = self.id_tensor
running_mean = branch.running_mean
running_var = branch.running_var
gamma = branch.weight
beta = branch.bias
eps = branch.eps
std = (running_var + eps).sqrt()
t = gamma / std
t = t.view(-1, 1, 1, 1)
return kernel * t, beta - running_mean * gamma / std
if __name__ == '__main__':
input = torch.randn(50, 512, 49, 49)
repblock = RepBlock(512, 512)
print(repblock)
repblock.eval()
out = repblock(input)
repblock._switch_to_deploy()
print(repblock,'2222')
out2 = repblock(input)
print('difference between vgg and repvgg')
print(((out2 - out) ** 2).sum())