Res2Net结构简单,性能优秀,可以进一步探索CNN在更细粒度级别的多“尺度”表示能力。 Res2Net揭示了一个新的维度,即“尺度”(Scale),除了深度,宽度和基数的现有维度之外,“尺度”是一个必不可少的更有效的因素。
“尺度”:在res2net模块内部,不同特征组合称为不同的尺度。原文(the number of feature groups in the Res2Net block)
yi表示Ki()的输出(代表不同的“尺度”的特征组合)。 子特征xi和Ki-1()的输出加在一起,然后送入Ki()。 所有的特征组合拼接后在送入1*1的卷积,实现不同“尺度”特征的融合。
class Bottle2neck(nn.Module):
expansion = 2 #通道压缩比,降低模型的计算量。
def __init__(self, inplanes, planes, stride=1, downsample=None, baseWidth=26, scale=4, stype='normal'):
""" Constructor
inplanes: input channel dimensionality #输入的特征数
planes: output channel dimensionality #1*1卷积通道压缩后的通道数,该模块最终的输出:planes*expansion
stride: conv stride. Replaces pooling layer.
downsample: None when stride = 1
baseWidth: basic width of conv3x3 #单一尺度上的3*3卷积的通道数量,控住不同尺度间,不同特征的重复利用的程度。
scale: number of scale. #分为几个尺度
type: 'normal': normal set. 'stage': first block of a new stage.# 'stage':没有不同尺度检测特征融合,相当于分组进行3*3卷积并最终进行拼接。
super(Bottle2neck, self).__init__()
width = int(math.floor(planes * (baseWidth / 64.0))) #每个尺度的输入的特征通道的数量
self.conv1 = nn.Conv2d(inplanes, width * scale, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(width * scale)
if scale == 1:
self.nums = 1
self.nums = scale - 1
if stype == 'stage':
self.pool = nn.AvgPool2d(kernel_size=3, stride=stride, padding=1)
convs = []
bns = []
for i in range(self.nums):
convs.append(nn.Conv2d(width, width, kernel_size=3, stride=stride, padding=1, bias=False))
self.convs = nn.ModuleList(convs)
self.bns = nn.ModuleList(bns)
self.conv3 = nn.Conv2d(width * scale, planes * self.expansion, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stype = stype
self.scale = scale
self.width = width
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
spx = torch.split(out, self.width, 1)
for i in range(self.nums):
if i == 0 or self.stype == 'stage':
sp = spx[i]
sp = sp + spx[i]
sp = self.convs[i](sp)
sp = self.relu(self.bns[i](sp))
if i == 0:
out = sp
out = torch.cat((out, sp), 1)
if self.scale != 1 and self.stype == 'normal': #其中有一组特征不做任何卷积操作,控制计算量不增加。因为不同特征组之间有重复利用的特征。
out = torch.cat((out, spx[self.nums]), 1)
elif self.scale != 1 and self.stype == 'stage':
out = torch.cat((out, self.pool(spx[self.nums])), 1)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
conv 32 3 x 3 / 1 608 x 608 x 3 -> 608 x 608 x 32
conv 64 3 x 3 / 2 608 x 608 x 32 -> 304 x 304 x 64
res*1 ------------>>>>>>>res2net*1
conv 128 3 x 3 / 2 304 x 304 x 64 -> 152 x 152 x 128
res*2 ------------>>>>>>>res2net*2
conv 256 3 x 3 / 2 152 x 152 x 128 -> 76 x 76 x 256
res*8 ------------>>>>>>>res2net*8
conv 512 3 x 3 / 2 76 x 76 x 256 -> 38 x 38 x 512
res*8 ------------>>>>>>>res2net*8
conv 1024 3 x 3 / 2 38 x 38 x 512 -> 19 x 19 x1024
res*4 ------------>>>>>>>res2net*4
conv 512 1 x 1 / 1 19 x 19 x1024 -> 19 x 19 x 512
conv 1024 3 x 3 / 1 19 x 19 x 512 -> 19 x 19 x1024
conv 255 1 x 1 / 1 19 x 19 x1024 -> 19 x 19 x 255
route -4
conv 256 1 x 1 / 1 19 x 19 x 512 -> 19 x 19 x 256
upsample 2x 19 x 19 x 256 -> 38 x 38 x 256
route -1 b
conv 256 1 x 1 / 1 38 x 38 x 512 -> 38 x 38 x 256
conv 512 3 x 3 / 1 38 x 38 x 256 -> 38 x 38 x 512
conv 255 1 x 1 / 1 38 x 38 x 512 -> 38 x 38 x 255
route -4
conv 128 1 x 1 / 1 38 x 38 x 256 -> 38 x 38 x 128
upsample 2x 38 x 38 x 128 -> 76 x 76 x 128
route -1 c
conv 128 1 x 1 / 1 76 x 76 x 256 -> 76 x 76 x 128
conv 256 3 x 3 / 1 76 x 76 x 128 -> 76 x 76 x 256
conv 255 1 x 1 / 1 76 x 76 x 256 -> 76 x 76 x 255
conv 32 3 x 3 / 1 608 x 608 x 3 -> 608 x 608 x 32
conv 64 3 x 3 / 2 608 x 608 x 32 -> 304 x 304 x 64
res*1 ------------>>>>>>>res2net*1
conv 128 3 x 3 / 2 304 x 304 x 64 -> 152 x 152 x 128
res*2 ------------>>>>>>>res2net*2
conv 256 3 x 3 / 2 152 x 152 x 128 -> 76 x 76 x 256
res*8 ------------>>>>>>>res2net*8
conv 512 3 x 3 / 2 76 x 76 x 256 -> 38 x 38 x 512
res*8 ------------>>>>>>>res2net*8
conv 1024 3 x 3 / 2 38 x 38 x 512 -> 19 x 19 x1024
res*4 ------------>>>>>>>res2net*4
triangle*2 ------------>>>>>>>res2_triangle*2
conv 512 1 x 1 / 1 19 x 19 x1024 -> 19 x 19 x 512
conv 1024 3 x 3 / 1 19 x 19 x 512 -> 19 x 19 x1024
conv 255 1 x 1 / 1 19 x 19 x1024 -> 19 x 19 x 255
route -4
conv 256 1 x 1 / 1 19 x 19 x 512 -> 19 x 19 x 256
upsample 2x 19 x 19 x 256 -> 38 x 38 x 256
route -1 b
triangle*2 ------------>>>>>>>res2_triangle*2
conv 256 1 x 1 / 1 38 x 38 x 512 -> 38 x 38 x 256
conv 512 3 x 3 / 1 38 x 38 x 256 -> 38 x 38 x 512
conv 255 1 x 1 / 1 38 x 38 x 512 -> 38 x 38 x 255
route -4
conv 128 1 x 1 / 1 38 x 38 x 256 -> 38 x 38 x 128
upsample 2x 38 x 38 x 128 -> 76 x 76 x 128
route -1 c
triangle*2 ------------>>>>>>>res2_triangle*2
conv 128 1 x 1 / 1 76 x 76 x 256 -> 76 x 76 x 128
conv 256 3 x 3 / 1 76 x 76 x 128 -> 76 x 76 x 256
conv 255 1 x 1 / 1 76 x 76 x 256 -> 76 x 76 x 255
def create_modules(module_defs):
for module_i, module_def in enumerate(module_defs):
modules = nn.Sequential()
elif module_def["type"] == "res2net":
filters = int(module_def["planes"])*2
res2net = Bottle2neck(inplanes=int(module_def["inplanes"]),planes=int(module_def["planes"]),
stride=1, downsample=None, baseWidth=26, scale=4, stype='normal')
modules.add_module(f"res2net_{module_i}", res2net)
elif module_def["type"] == "triangle":
triangle = Bottle2neck(inplanes=int(module_def["inplanes"]),planes=int(module_def["planes"]),
stride=1, downsample=None, baseWidth=16, scale=4, stype='normal')
modules.add_module(f"triangle_{module_i}", triangle)
# Register module list and number of output filters
output_filters.append(filters) # filter保存了输出的维度
return hyperparams, module_list, resnum
class Bottle2neck(nn.Module):
expansion = 2 #通道压缩比,降低模型的计算量。
def __init__(self, inplanes, planes, stride=1, downsample=None, baseWidth=26, scale=4, stype='normal'):
""" Constructor
inplanes: input channel dimensionality #输入的特征数
planes: output channel dimensionality #1*1卷积通道压缩后的通道数,该模块最终的输出:planes*expansion
stride: conv stride. Replaces pooling layer.
downsample: None when stride = 1
baseWidth: basic width of conv3x3 #单一尺度上的3*3卷积的通道数量,控住不同尺度间,不同特征的重复利用的程度。
scale: number of scale. #分为几个尺度
type: 'normal': normal set. 'stage': first block of a new stage.
super(Bottle2neck, self).__init__()
width = int(math.floor(planes * (baseWidth / 64.0)))
self.conv1 = nn.Conv2d(inplanes, width * scale, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(width * scale)
if scale == 1:
self.nums = 1
self.nums = scale - 1
if stype == 'stage':
self.pool = nn.AvgPool2d(kernel_size=3, stride=stride, padding=1)
convs = []
bns = []
for i in range(self.nums):
convs.append(nn.Conv2d(width, width, kernel_size=3, stride=stride, padding=1, bias=False))
self.convs = nn.ModuleList(convs)
self.bns = nn.ModuleList(bns)
self.conv3 = nn.Conv2d(width * scale, planes * self.expansion, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.leaky = nn.LeakyReLU(0.1,inplace=True)
self.linear = nn.LeakyReLU(1.0, inplace=True)#纯线性激活函数,后期会做解释。
self.downsample = downsample
self.stype = stype
self.scale = scale
self.width = width
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.leaky(out)
spx = torch.split(out, self.width, 1)
for i in range(self.nums):
if i == 0 or self.stype == 'stage':
sp = spx[i]
sp = sp + spx[i]
sp = self.convs[i](sp)
sp = self.leaky(self.bns[i](sp))
if i == 0:
out = sp
out = torch.cat((out, sp), 1)
if self.scale != 1 and self.stype == 'normal':
out = torch.cat((out, spx[self.nums]), 1)
elif self.scale != 1 and self.stype == 'stage':
out = torch.cat((out, self.pool(spx[self.nums])), 1)
out = self.conv3(out)
out = self.bn3(out)
# out = self.leaky(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.linear(out)
return out
class Triangle(nn.Module):
expansion = 2 #通道压缩比,降低模型的计算量。
def __init__(self, inplanes, planes, stride=1, downsample=None, baseWidth=16, scale=4, stype='normal'):
""" Constructor
inplanes: input channel dimensionality #输入的特征数
planes: output channel dimensionality #1*1卷积通道压缩后的通道数,该模块最终的输出:planes*expansion
stride: conv stride. Replaces pooling layer.
downsample: None when stride = 1
baseWidth: basic width of conv3x3 #单一尺度上的3*3卷积的通道数量,控住不同尺度间,不同特征的重复利用的程度。
scale: number of scale. #分为几个尺度
type: 'normal': normal set. 'stage': first block of a new stage.
super(Triangle, self).__init__()
width = int(math.floor(planes * (baseWidth / 64.0)))
self.conv1 = nn.Conv2d(inplanes, width * scale, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(width * scale)
if scale == 1:
self.nums = 1
self.nums = scale - 1
if stype == 'stage':
self.pool = nn.AvgPool2d(kernel_size=3, stride=stride, padding=1)
convs = []
bns = []
for i in range(self.nums):
convs.append(nn.Conv2d(width, width, kernel_size=3, stride=stride, padding=1, bias=False))
self.convs = nn.ModuleList(convs)
self.bns = nn.ModuleList(bns)
self.leaky = nn.LeakyReLU(0.1,inplace=True)
self.downsample = downsample
self.stype = stype
self.scale = scale
self.width = width
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.leaky(out)
spx = torch.split(out, self.width, 1)
for i in range(self.nums):
if i == 0 or self.stype == 'stage':
sp = spx[i]
sp = sp + spx[i]
sp = self.convs[i](sp)
sp = self.leaky(self.bns[i](sp))
if i == 0:
out = sp
out = torch.cat((out, sp), 1)
if self.scale != 1 and self.stype == 'normal':
out = torch.cat((out, spx[self.nums]), 1)
elif self.scale != 1 and self.stype == 'stage':
out = torch.cat((out, self.pool(spx[self.nums])), 1)
return out
class Darknet(nn.Module):
def load_darknet_weights(self, weights_path):
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
elif module_def["type"] == "res2net":
module_r = module[0]
conv1 = module_r.conv1
bn1 = module_r.bn1
# Load BN bias, weights, running mean and running variance
num_b = bn1.bias.numel() # Number of biases
# Bias
bn_b = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn1.bias)
ptr += num_b
# Weight
bn_w = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn1.weight)
ptr += num_b
# Running Mean
bn_rm = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn1.running_mean)
ptr += num_b
# Running Var
bn_rv = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn1.running_var)
ptr += num_b
# Load conv. weights
num_w = conv1.weight.numel()
conv_w = torch.from_numpy(weights[ptr: ptr + num_w]).view_as(conv1.weight)
ptr += num_w
convs = module_r.convs
bns = module_r.bns
for i in range(3):
# Load BN bias, weights, running mean and running variance
num_b = bns[i].bias.numel() # Number of biases
# Bias
bn_b = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bns[i].bias)
ptr += num_b
# Weight
bn_w = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bns[i].weight)
ptr += num_b
# Running Mean
bn_rm = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bns[i].running_mean)
ptr += num_b
# Running Var
bn_rv = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bns[i].running_var)
ptr += num_b
# Load conv. weights
num_w = convs[i].weight.numel()
conv_w = torch.from_numpy(weights[ptr: ptr + num_w]).view_as(convs[i].weight)
ptr += num_w
conv3 = module_r.conv3
bn3 = module_r.bn3
# Load BN bias, weights, running mean and running variance
num_b = bn3.bias.numel() # Number of biases
# Bias
bn_b = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn3.bias)
ptr += num_b
# Weight
bn_w = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn3.weight)
ptr += num_b
# Running Mean
bn_rm = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn3.running_mean)
ptr += num_b
# Running Var
bn_rv = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn3.running_var)
ptr += num_b
# Load conv. weights
num_w = conv3.weight.numel()
conv_w = torch.from_numpy(weights[ptr: ptr + num_w]).view_as(conv3.weight)
ptr += num_w
elif module_def["type"] == "triangle":
module_r = module[0]
conv1 = module_r.conv1
bn1 = module_r.bn1
# Load BN bias, weights, running mean and running variance
num_b = bn1.bias.numel() # Number of biases
# Bias
bn_b = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn1.bias)
ptr += num_b
# Weight
bn_w = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn1.weight)
ptr += num_b
# Running Mean
bn_rm = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn1.running_mean)
ptr += num_b
# Running Var
bn_rv = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bn1.running_var)
ptr += num_b
# Load conv. weights
num_w = conv1.weight.numel()
conv_w = torch.from_numpy(weights[ptr: ptr + num_w]).view_as(conv1.weight)
ptr += num_w
convs = module_r.convs
bns = module_r.bns
for i in range(3):
# Load BN bias, weights, running mean and running variance
num_b = bns[i].bias.numel() # Number of biases
# Bias
bn_b = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bns[i].bias)
ptr += num_b
# Weight
bn_w = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bns[i].weight)
ptr += num_b
# Running Mean
bn_rm = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bns[i].running_mean)
ptr += num_b
# Running Var
bn_rv = torch.from_numpy(weights[ptr: ptr + num_b]).view_as(bns[i].running_var)
ptr += num_b
# Load conv. weights
num_w = convs[i].weight.numel()
conv_w = torch.from_numpy(weights[ptr: ptr + num_w]).view_as(convs[i].weight)
ptr += num_w
# 确保指针到达权重的最后一个位置
assert ptr == len(weights)
def save_darknet_weights(self, path, cutoff=-1):
# Iterate through layers
for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
# print(module)
if module_def["type"] == "convolutional":
elif module_def["type"] == "res2net":
module_r = module[0]
conv1 = module_r.conv1
# If batch norm, load bn first
bn1 = module_r.bn1
# Load conv weights
convs = module_r.convs
bns = module_r.bns
for i in range(3):
# Load conv weights
conv3 = module_r.conv3
# If batch norm, load bn first
bn3 = module_r.bn3
# Load conv weights
elif module_def["type"] == "triangle":
module_r = module[0]
conv1 = module_r.conv1
# If batch norm, load bn first
bn1 = module_r.bn1
# Load conv weights
convs = module_r.convs
bns = module_r.bns
for i in range(3):
# Load conv weights