个人总结,pytorch中提取任意层的feature有两种方法,这两种方法是根据网络构建的方法不同而产生的;
首先来介绍第一种:
以mobileFaceNet为例,看一下mobileFace构建的网络代码:
class MobileFaceNet(Module):
def __init__(self, embedding_size,class_num):
super(MobileFaceNet, self).__init__()
self.conv1 = Conv_block(3, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1))
self.conv2_dw = Conv_block(64, 64, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
self.conv_23 = Depth_Wise(64, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128)
self.conv_3 = Residual(64, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
self.conv_34 = Depth_Wise(64, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256)
self.conv_4 = Residual(128, num_block=6, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
self.conv_45 = Depth_Wise(128, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512)
self.conv_5 = Residual(128, num_block=2, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
self.conv_6_sep = Conv_block(128, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0))
self.conv_6_dw = Linear_block(512, 512, groups=512, kernel=(7,7), stride=(1, 1), padding=(0, 0))
self.conv_6_flatten = Flatten()
self.linear = Linear(512, embedding_size, bias=False)
self.bn = BatchNorm1d(embedding_size)
self.fc = Linear(512, class_num)
def forward(self, x):
out = self.conv1(x) #shape: x.shape=[batch_size,channel,h,w]=[batch_size,3,112,112] out.shape = [3,64,56,56]
out = self.conv2_dw(out)#out.shape = [batch_size,64,56,56]
out = self.conv_23(out)#out.shape = [batch_size,64,28,28]
out = self.conv_3(out)#Residual:out.shape = [batch_size,64,28,28]************
out = self.conv_34(out)#out.shape = [batch_size,128,14,14]
out = self.conv_4(out)#Residual:out.shape = [batch_size,128,14,14]*************
out = self.conv_45(out)#out.shape = [batch_size,128,7,7]
out_res = self.conv_5(out)#Residual:out.shape = [batch_size,128,7,7]***************
out = self.conv_6_sep(out_res)#out.shape = [batch_size,512,7,7]
out = self.conv_6_dw(out)#out.shape = [batch_size,512,1,1]
out = self.conv_6_flatten(out)#out.shape = [batch_size,512]
out = self.linear(out)#out.shape = [batch_size,512]
out = self.bn(out)#out.shape = [batch_size,512]
out_feature = l2_norm(out)#out.shape = [batch_size,512]
out = self.fc(out_feature)#out.shape = [batch_size,class_num]
return out, out_res #l2_norm(out)
以上是mobileFace的主干网络,有一些细节没有展现,感兴趣可以去github上搜索下载InsightFace(https://github.com/TreB1eN/InsightFace_Pytorch),这个工程中有mobileFace的网络构建(model.py)和使用的代码(Learning.py)。
像上面这种网络构建的方法,只用在forward函数的return中多加一个返回参数,比如上面的代码中我想使用self.conv5的feature,可以直接返回self.conv5的输出:return out,out_res,其中out是整个网络的输出,而out_res就是self.conv5这层的feature值;
然后介绍第二种,以下是使用了Sequential构建的se_resnet50。同样,这部分网络构建可以在InsightFace(https://github.com/TreB1eN/InsightFace_Pytorch)工程中找到,贴出网络结构主干部分的代码:
class Backbone(Module):
def __init__(self, num_layers, drop_ratio, mode='ir'):
super(Backbone, self).__init__()
assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
blocks = get_blocks(num_layers)
if mode == 'ir':
unit_module = bottleneck_IR
elif mode == 'ir_se':
unit_module = bottleneck_IR_SE
self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
BatchNorm2d(64),
PReLU(64))
self.output_layer = Sequential(BatchNorm2d(512),
Dropout(drop_ratio),
Flatten(),
Linear(512 * 7 * 7, 512),
BatchNorm1d(512))
modules = []
for block in blocks:
for bottleneck in block:
modules.append(
unit_module(bottleneck.in_channel,
bottleneck.depth,
bottleneck.stride))
self.body = Sequential(*modules)
def forward(self, x):
x = self.input_layer(x)
x = self.body(x)
x = self.output_layer(x)
return l2_norm(x)
这里forward函数就三层输出,使用上面的方法根本没有办法获得self.body下面每层的feature。那此时该怎么办了?
第一步是将这个网络结构打印出来,代码:
self.model = Backbone(conf.net_depth, conf.drop_ratio, conf.net_mode).to(conf.device)
print(self.model)
可以看到下面的输出结果:
Backbone(
(input_layer): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): PReLU(num_parameters=64)
)
(output_layer): Sequential(
(0): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): Dropout(p=0.6)
(2): Flatten()
(3): Linear(in_features=25088, out_features=512, bias=True)
(4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(body): Sequential(
(0): bottleneck_IR_SE(
(shortcut_layer): MaxPool2d(kernel_size=1, stride=2, padding=0, dilation=1, ceil_mode=False)
(res_layer): Sequential(
(0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(2): PReLU(num_parameters=64)
(3): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): SEModule(
(avg_pool): AdaptiveAvgPool2d(output_size=1)
(fc1): Conv2d(64, 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
(relu): ReLU(inplace)
(fc2): Conv2d(4, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(sigmoid): Sigmoid()
)
)
)
(1): bottleneck_IR_SE(
(shortcut_layer): MaxPool2d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
(res_layer): Sequential(
(0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(2): PReLU(num_parameters=64)
(3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): SEModule(
(avg_pool): AdaptiveAvgPool2d(output_size=1)
(fc1): Conv2d(64, 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
(relu): ReLU(inplace)
(fc2): Conv2d(4, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(sigmoid): Sigmoid()
)
)
)
... ...
(23): bottleneck_IR_SE(
(shortcut_layer): MaxPool2d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
(res_layer): Sequential(
(0): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(2): PReLU(num_parameters=512)
(3): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): SEModule(
(avg_pool): AdaptiveAvgPool2d(output_size=1)
(fc1): Conv2d(512, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(relu): ReLU(inplace)
(fc2): Conv2d(32, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(sigmoid): Sigmoid()
)
)
)
)
)
现在,我要提取‘body’中的第(2)层的特征:
for name, module in self.model._modules.items():
imgs = module(imgs)
# print(name)
if name == 'input_layer':
break
for name, module in self.model._modules['body']._modules.items():
# print(name)
imgs = module(imgs)
# print(name + ": ", imgs.shape)
if name == '2':
break
这样就可以了,但是同时又产生了另一个问题,就是训练是多gpu时,上面的第二种方法无法使用,还在修改中;大佬有什么好的方法望推荐;
推荐链接:
https://blog.csdn.net/Hungryof/article/details/80921417