这里我们以PyTorch自带的预训练模型为例来讲解:
# load the pretrained model
alexnet = models.alexnet(pretrained=True).cuda()
print(alexnet)
AlexNet (
(features): Sequential (
(0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
(1): ReLU (inplace)
(2): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
(3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(4): ReLU (inplace)
(5): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
(6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(7): ReLU (inplace)
(8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): ReLU (inplace)
(10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU (inplace)
(12): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
)
(classifier): Sequential (
(0): Dropout (p = 0.5)
(1): Linear (9216 -> 4096)
(2): ReLU (inplace)
(3): Dropout (p = 0.5)
(4): Linear (4096 -> 4096)
(5): ReLU (inplace)
(6): Linear (4096 -> 1000)
)
)
如果我们想查看倒数第二层全连接层的结果,可以有以下三种方式实现:
class AlexNet(torch.nn.Module):
def __init__(self):
super(AlexNet, self).__init__()
self.model_name = 'alexnet'
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
# nn.ReLU(inplace=True),
# nn.Linear(4096, num_classes),
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), 256 * 6 * 6)
x = self.classifier(x)
return x
new_model = AlexNet().cuda()
pretrained_dict = orig_model.state_dict()
model_dict = new_model.state_dict()
# remove the key in pretrained_dict that do not belong the model_dict
pretrained_dicted = {k: v for k, v in pretrained_dict.items() if k in model_dict}
# update the model_dict
model_dict.update(pretrained_dicted)
new_model.load_state_dict(model_dict)
这种方法的优点是可以进行多处删减,实现起来比较清晰。缺点就是实现的过程比较复杂。当模型较大时,效率比较低。而且模型的参数和名称必须与原来的模型对应。
# remove last fully-connected layer
# the number -2 indicate how many layers should be removed
alexnet.classifier = nn.Sequential(*list(alexnet.classifier.children())[:-2])
print(alexnet)
AlexNet (
(features): Sequential (
(0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
(1): ReLU (inplace)
(2): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
(3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(4): ReLU (inplace)
(5): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
(6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(7): ReLU (inplace)
(8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): ReLU (inplace)
(10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU (inplace)
(12): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
)
(classifier): Sequential (
(0): Dropout (p = 0.5)
(1): Linear (9216 -> 4096)
(2): ReLU (inplace)
(3): Dropout (p = 0.5)
(4): Linear (4096 -> 4096)
)
这种方法的优点是速度快,方便,缺点是要删除掉某些层,不易实现多处修改。适用于剔除最后几层。
def get_features_hook(self, input, output):
print("hook",output.data.cpu().numpy().shape)
handle=alexnet.classifier[4].register_forward_hook(get_features_hook)
这种方法的有点是不用改变原有模型的结构,可以实现任意地点的精准查看。缺点是每次调用可能会带来时间上的消耗。
handle.remove()
最后可以通过handle句柄删除hook,这里的handle可以是任意名称。