本文基于chainer实现RepVGG网络结构,并基于torch的结构方式构建chainer版的,并计算RepVGG的参数量。
optional_groupwise_layers = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26]
g2_map = {l: 2 for l in optional_groupwise_layers}
g4_map = {l: 4 for l in optional_groupwise_layers}
class SEBlock(chainer.Chain):
def __init__(self, input_channels, internal_neurons):
super(SEBlock, self).__init__()
self.layers = []
self.layers += [('down',L.Convolution2D(in_channels=input_channels,out_channels=internal_neurons,ksize=1,stride=1,nobias=True))]
self.layers += [('_relu',ReLU())]
self.layers += [('up',L.Convolution2D(in_channels=internal_neurons,out_channels=input_channels,ksize=1,stride=1,nobias=True))]
self.layers += [('_sigmoid',Sigmoid())]
self.input_channels = input_channels
with self.init_scope():
for n in self.layers:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
def forward(self, inputs):
x = average_pooling_2d(inputs, ksize=inputs.shape[2])
for n, f in self.layers:
if not n.startswith('_'):
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
x = x.reshape(-1, self.input_channels, 1, 1)
return inputs * x
class Conv_BN(chainer.Chain):
def __init__(self,in_channels, out_channels, kernel_size, stride, padding, groups=1):
super(Conv_BN, self).__init__()
self.layers =[]
self.layers += [('conv',L.Convolution2D(in_channels=in_channels,out_channels=out_channels,ksize=kernel_size,stride=stride,pad=padding,groups=groups, nobias=True))]
self.layers += [('bn',BatchNormalization(out_channels))]
with self.init_scope():
for n in self.layers:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
def __call__(self, x):
for n, f in self.layers:
if not n.startswith('_'):
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
return x
class RepVGGBlock(chainer.Chain):
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, padding_mode='zeros', deploy=False, use_se=False):
super(RepVGGBlock, self).__init__()
self.deploy = deploy
self.groups = groups
self.in_channels = in_channels
assert kernel_size == 3
assert padding == 1
padding_11 = padding - kernel_size // 2
self.deploy = deploy
self.layers = []
if deploy:
self.layers += [('rbr_reparam',L.Convolution2D(in_channels=in_channels, out_channels=out_channels, ksize=kernel_size, stride=stride, pad=padding, groups=groups, nobias=False))]
if use_se:
self.layers += [('se',SEBlock(out_channels, internal_neurons=out_channels // 16))]
self.layers += [('_act',ReLU())]
else:
if out_channels == in_channels and stride == 1 :
self.layers += [('@rbr_identity',BatchNormalization(in_channels))]
self.layers += [('@rbr_dense',Conv_BN(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups))]
self.layers += [('@rbr_1x1',Conv_BN(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, padding=padding_11, groups=groups))]
if use_se:
self.layers += [('se',SEBlock(out_channels, internal_neurons=out_channels // 16))]
self.layers += [('_act',ReLU())]
with self.init_scope():
for n in self.layers:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
def forward(self, inputs):
x = inputs
if self.deploy:
for n, f in self.layers:
if not n.startswith('_'):
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
return x
else:
x = None
for n, f in self.layers:
if not n.startswith('_'):
if n.startswith('@'):
if x is None:
x = getattr(self, n)(inputs)
else:
x += getattr(self, n)(inputs)
else:
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
return x
class RepVGG(chainer.Chain):
cfgs={
'repvgg_a0':{'num_blocks':[2, 4, 14, 1],'width_multiplier':[0.75, 0.75, 0.75, 2.5],'override_groups_map':None,'use_se':False},
'repvgg_a1':{'num_blocks':[2, 4, 14, 1],'width_multiplier':[1, 1, 1, 2.5],'override_groups_map':None,'use_se':False},
'repvgg_a2':{'num_blocks':[2, 4, 14, 1],'width_multiplier':[1.5, 1.5, 1.5, 2.75],'override_groups_map':None,'use_se':False},
'repvgg_b0':{'num_blocks':[4, 6, 16, 1],'width_multiplier':[1, 1, 1, 2.5],'override_groups_map':None,'use_se':False},
'repvgg_b1':{'num_blocks':[4, 6, 16, 1],'width_multiplier':[2, 2, 2, 4],'override_groups_map':None,'use_se':False},
'repvgg_b1g2':{'num_blocks':[4, 6, 16, 1],'width_multiplier':[2, 2, 2, 4],'override_groups_map':g2_map,'use_se':False},
'repvgg_b1g4':{'num_blocks':[4, 6, 16, 1],'width_multiplier':[2, 2, 2, 4],'override_groups_map':g4_map,'use_se':False},
'repvgg_b2':{'num_blocks':[4, 6, 16, 1],'width_multiplier':[2.5, 2.5, 2.5, 5],'override_groups_map':None,'use_se':False},
'repvgg_b2g2':{'num_blocks':[4, 6, 16, 1],'width_multiplier':[2.5, 2.5, 2.5, 5],'override_groups_map':g2_map,'use_se':False},
'repvgg_b2g4':{'num_blocks':[4, 6, 16, 1],'width_multiplier':[2.5, 2.5, 2.5, 5],'override_groups_map':g4_map,'use_se':False},
'repvgg_b3':{'num_blocks':[4, 6, 16, 1],'width_multiplier':[3, 3, 3, 5],'override_groups_map':None,'use_se':False},
'repvgg_b3g2':{'num_blocks':[4, 6, 16, 1],'width_multiplier':[3, 3, 3, 5],'override_groups_map':g2_map,'use_se':False},
'repvgg_b3g4':{'num_blocks':[4, 6, 16, 1],'width_multiplier':[3, 3, 3, 5],'override_groups_map':g4_map,'use_se':False},
'repvgg_d2se':{'num_blocks':[8, 14, 24, 1],'width_multiplier':[2.5, 2.5, 2.5, 5],'override_groups_map':None,'use_se':True},
}
def __init__(self, model_name='repvgg_b3g4', num_classes=1000,channels=3, deploy=False, batch_size=4,image_size=224,**kwargs):
super(RepVGG, self).__init__()
assert len(self.cfgs[model_name]['width_multiplier']) == 4
self.deploy = deploy
self.image_size = image_size
self.override_groups_map = self.cfgs[model_name]['override_groups_map'] or dict()
assert 0 not in self.override_groups_map
self.use_se = self.cfgs[model_name]['use_se']
self.in_planes = min(64, int(64 * self.cfgs[model_name]['width_multiplier'][0]))
self.layers = []
self.layers += [('stage0',RepVGGBlock(in_channels=channels, out_channels=self.in_planes, kernel_size=3, stride=2, padding=1, deploy=self.deploy, use_se=self.use_se))]
output_size = int((self.image_size-3+2*((3-1)//2))/2+1)
self.cur_layer_idx = 1
self._make_stage('stage1',int(64 * self.cfgs[model_name]['width_multiplier'][0]), self.cfgs[model_name]['num_blocks'][0], stride=2)
output_size = math.ceil(output_size / 2)
self._make_stage('stage2',int(128 * self.cfgs[model_name]['width_multiplier'][1]), self.cfgs[model_name]['num_blocks'][1], stride=2)
output_size = math.ceil(output_size / 2)
self._make_stage('stage3',int(256 * self.cfgs[model_name]['width_multiplier'][2]), self.cfgs[model_name]['num_blocks'][2], stride=2)
output_size = math.ceil(output_size / 2)
self._make_stage('stage4',int(512 * self.cfgs[model_name]['width_multiplier'][3]), self.cfgs[model_name]['num_blocks'][3], stride=2)
output_size = math.ceil(output_size / 2)
self.layers += [('_avgpool',AveragePooling2D(ksize=output_size,stride=1,pad=0))]
self.layers += [('_reshape',Reshape((batch_size,int(512 * self.cfgs[model_name]['width_multiplier'][3]))))]
self.layers += [('fc',L.Linear(int(512 * self.cfgs[model_name]['width_multiplier'][3]), num_classes))]
with self.init_scope():
for n in self.layers:
if not n[0].startswith('_'):
setattr(self, n[0], n[1])
def _make_stage(self, stage,planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
for stride in strides:
cur_groups = self.override_groups_map.get(self.cur_layer_idx, 1)
self.layers += [('{}_block{}'.format(stage,self.cur_layer_idx),RepVGGBlock(in_channels=self.in_planes, out_channels=planes, kernel_size=3, stride=stride, padding=1, groups=cur_groups, deploy=self.deploy, use_se=self.use_se))]
self.in_planes = planes
self.cur_layer_idx += 1
def forward(self, x):
for n, f in self.layers:
origin_size = x.shape
if not n.startswith('_'):
x = getattr(self, n)(x)
else:
x = f.apply((x,))[0]
print(n,origin_size,x.shape)
if chainer.config.train:
return x
return F.softmax(x)
注意此类就是RepVGG的实现过程,注意网络的前向传播过程中,分了训练以及测试。
训练过程中直接返回x,测试过程中会进入softmax得出概率
if __name__ == '__main__':
batch_size = 4
n_channels = 3
image_size = 224
num_classes = 123
model = RepVGG(num_classes=num_classes, channels=n_channels,image_size=image_size,batch_size=batch_size)
print("参数量",model.count_params())
x = np.random.rand(batch_size, n_channels, image_size, image_size).astype(np.float32)
t = np.random.randint(0, num_classes, size=(batch_size,)).astype(np.int32)
with chainer.using_config('train', True):
y1 = model(x)
loss1 = F.softmax_cross_entropy(y1, t)