在使用预训练模型微调训练时,我们通常需要根据实际的数据集以及想要达到的效果,修改预训练模型的结构。查阅了其他博客和torch.nn源码后,做个笔记。
为了更方便的了解,将使用torchvision中的模型convnext作介绍。
import torch
import torchvision.models as models
import torch.nn as nn
model = models.convnext_tiny(pretrained = False)
print(model)
######################输出的模型结果###################
ConvNeXt(
(features): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
(1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
)
(1): Sequential(
(0): CNBlock(
(block): Sequential(
(0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
(1): Permute()
(2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=96, out_features=384, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=384, out_features=96, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.0, mode=row)
)
(1): CNBlock(
(block): Sequential(
(0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
(1): Permute()
(2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=96, out_features=384, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=384, out_features=96, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.0058823529411764705, mode=row)
)
(2): CNBlock(
(block): Sequential(
(0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
(1): Permute()
(2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=96, out_features=384, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=384, out_features=96, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.011764705882352941, mode=row)
)
)
(2): Sequential(
(0): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
(1): Conv2d(96, 192, kernel_size=(2, 2), stride=(2, 2))
)
(3): Sequential(
(0): CNBlock(
(block): Sequential(
(0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192)
(1): Permute()
(2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=192, out_features=768, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=768, out_features=192, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.017647058823529415, mode=row)
)
(1): CNBlock(
(block): Sequential(
(0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192)
(1): Permute()
(2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=192, out_features=768, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=768, out_features=192, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.023529411764705882, mode=row)
)
(2): CNBlock(
(block): Sequential(
(0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192)
(1): Permute()
(2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=192, out_features=768, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=768, out_features=192, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.029411764705882353, mode=row)
)
)
(4): Sequential(
(0): LayerNorm2d((192,), eps=1e-06, elementwise_affine=True)
(1): Conv2d(192, 384, kernel_size=(2, 2), stride=(2, 2))
)
(5): Sequential(
(0): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.03529411764705883, mode=row)
)
(1): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.0411764705882353, mode=row)
)
(2): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.047058823529411764, mode=row)
)
(3): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.052941176470588235, mode=row)
)
(4): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.058823529411764705, mode=row)
)
(5): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.06470588235294118, mode=row)
)
(6): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.07058823529411766, mode=row)
)
(7): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.07647058823529412, mode=row)
)
(8): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.0823529411764706, mode=row)
)
)
(6): Sequential(
(0): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(1): Conv2d(384, 768, kernel_size=(2, 2), stride=(2, 2))
)
(7): Sequential(
(0): CNBlock(
(block): Sequential(
(0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768)
(1): Permute()
(2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=768, out_features=3072, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=3072, out_features=768, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.08823529411764706, mode=row)
)
(1): CNBlock(
(block): Sequential(
(0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768)
(1): Permute()
(2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=768, out_features=3072, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=3072, out_features=768, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.09411764705882353, mode=row)
)
(2): CNBlock(
(block): Sequential(
(0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768)
(1): Permute()
(2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=768, out_features=3072, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=3072, out_features=768, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.1, mode=row)
)
)
)
(avgpool): AdaptiveAvgPool2d(output_size=1)
(classifier): Sequential(
(0): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True)
(1): Flatten(start_dim=1, end_dim=-1)
(2): Linear(in_features=768, out_features=1000, bias=True)
)
)
pytorch 中的 state_dict 是一个简单的python的字典对象,将每一层与它的对应参数建立映射关系.(如model的每一层的weights及偏置等等)。这个方法的作用一方面是方便查看某一个层的权值和偏置数据,另一方面更多的是在模型保存的时候使用。
torch.save(model.state_dict(), 'model_weights.pth') #保存模型的参数以及权重
#使用预训练的模型时
model = models.convnext_tiny(pretrained = False) #生成相同的模型结构
model.load_state_dict(torch.load('model_weights.pth')) #将参数权重加载到模型之中
这个方法也可以获取模型的参数信息,与前面的方法不同的是,model.parameters()方法返回的是一个生成器generator,每一个元素是从开头到结尾的参数,parameters没有对应的key名称,是一个由纯参数组成的generator,而state_dict是一个字典,包含了一个key。
PyTorch中模型参数都是由字典的形式保存,所以当你想要修改模型结构时,直接通过字典的方式调用你想要的结构并且重新定义,就可以修改模型的参数。
model.classifier = nn.Linear(in_features=768, out_features=1000, bias=True)
print(model)
#########################输出的结果如下#####################
ConvNeXt(
(features): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
(1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
)
(1): Sequential(
(0): CNBlock(
(block): Sequential(
(0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
(1): Permute()
(2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=96, out_features=384, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=384, out_features=96, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.0, mode=row)
)
(1): CNBlock(
(block): Sequential(
(0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
(1): Permute()
(2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=96, out_features=384, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=384, out_features=96, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.0058823529411764705, mode=row)
)
(2): CNBlock(
(block): Sequential(
(0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
(1): Permute()
(2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=96, out_features=384, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=384, out_features=96, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.011764705882352941, mode=row)
)
)
(2): Sequential(
(0): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
(1): Conv2d(96, 192, kernel_size=(2, 2), stride=(2, 2))
)
(3): Sequential(
(0): CNBlock(
(block): Sequential(
(0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192)
(1): Permute()
(2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=192, out_features=768, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=768, out_features=192, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.017647058823529415, mode=row)
)
(1): CNBlock(
(block): Sequential(
(0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192)
(1): Permute()
(2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=192, out_features=768, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=768, out_features=192, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.023529411764705882, mode=row)
)
(2): CNBlock(
(block): Sequential(
(0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192)
(1): Permute()
(2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=192, out_features=768, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=768, out_features=192, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.029411764705882353, mode=row)
)
)
(4): Sequential(
(0): LayerNorm2d((192,), eps=1e-06, elementwise_affine=True)
(1): Conv2d(192, 384, kernel_size=(2, 2), stride=(2, 2))
)
(5): Sequential(
(0): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.03529411764705883, mode=row)
)
(1): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.0411764705882353, mode=row)
)
(2): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.047058823529411764, mode=row)
)
(3): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.052941176470588235, mode=row)
)
(4): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.058823529411764705, mode=row)
)
(5): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.06470588235294118, mode=row)
)
(6): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.07058823529411766, mode=row)
)
(7): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.07647058823529412, mode=row)
)
(8): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.0823529411764706, mode=row)
)
)
(6): Sequential(
(0): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(1): Conv2d(384, 768, kernel_size=(2, 2), stride=(2, 2))
)
(7): Sequential(
(0): CNBlock(
(block): Sequential(
(0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768)
(1): Permute()
(2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=768, out_features=3072, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=3072, out_features=768, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.08823529411764706, mode=row)
)
(1): CNBlock(
(block): Sequential(
(0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768)
(1): Permute()
(2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=768, out_features=3072, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=3072, out_features=768, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.09411764705882353, mode=row)
)
(2): CNBlock(
(block): Sequential(
(0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768)
(1): Permute()
(2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=768, out_features=3072, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=3072, out_features=768, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.1, mode=row)
)
)
)
(avgpool): AdaptiveAvgPool2d(output_size=1)
(classifier): Linear(in_features=768, out_features=1000, bias=True)
)
但是,我们可以看到,当我们直接修改classifier里面的结构,他会将整个classifier都重新定义为你的输入的样子,那么当你只是要修改最后的分类层的话,你就只能重新去定义一整个sequential,并且在重新定义的时候,如果你已经加载了预训练的参数,预训练模型的参数就会丢失,这样就会非常麻烦。
那如何只修改classifier中最后一个线性层呢,我在网络上查找了这个内容,发现比较难找到相关的内容,大家都只能通过访问模型拥有key的部分的结构,对于sequential内部的结构,有人尝试用model.classifier.0去访问,这并不符合python的语法结构。所以我又去查询了nn.sequential的官方文档。
def _get_item_by_idx(self, iterator, idx) -> T:
"""Get the idx-th item of the iterator"""
size = len(self)
idx = operator.index(idx)
if not -size <= idx < size:
raise IndexError('index {} is out of range'.format(idx))
idx %= size
return next(islice(iterator, idx, None))
可以看到其中有个迭代器,所以我就尝试了用列表的访问方式去访问,结果证明是可行的。
model.classifier[2] = nn.Linear(in_features=768, out_features=4, bias=True)
print(model)
####################输出结果如下########################
ConvNeXt(
(features): Sequential(
(0): Conv2dNormActivation(
(0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
(1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
)
(1): Sequential(
(0): CNBlock(
(block): Sequential(
(0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
(1): Permute()
(2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=96, out_features=384, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=384, out_features=96, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.0, mode=row)
)
(1): CNBlock(
(block): Sequential(
(0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
(1): Permute()
(2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=96, out_features=384, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=384, out_features=96, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.0058823529411764705, mode=row)
)
(2): CNBlock(
(block): Sequential(
(0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
(1): Permute()
(2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=96, out_features=384, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=384, out_features=96, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.011764705882352941, mode=row)
)
)
(2): Sequential(
(0): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
(1): Conv2d(96, 192, kernel_size=(2, 2), stride=(2, 2))
)
(3): Sequential(
(0): CNBlock(
(block): Sequential(
(0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192)
(1): Permute()
(2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=192, out_features=768, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=768, out_features=192, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.017647058823529415, mode=row)
)
(1): CNBlock(
(block): Sequential(
(0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192)
(1): Permute()
(2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=192, out_features=768, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=768, out_features=192, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.023529411764705882, mode=row)
)
(2): CNBlock(
(block): Sequential(
(0): Conv2d(192, 192, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=192)
(1): Permute()
(2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=192, out_features=768, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=768, out_features=192, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.029411764705882353, mode=row)
)
)
(4): Sequential(
(0): LayerNorm2d((192,), eps=1e-06, elementwise_affine=True)
(1): Conv2d(192, 384, kernel_size=(2, 2), stride=(2, 2))
)
(5): Sequential(
(0): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.03529411764705883, mode=row)
)
(1): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.0411764705882353, mode=row)
)
(2): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.047058823529411764, mode=row)
)
(3): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.052941176470588235, mode=row)
)
(4): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.058823529411764705, mode=row)
)
(5): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.06470588235294118, mode=row)
)
(6): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.07058823529411766, mode=row)
)
(7): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.07647058823529412, mode=row)
)
(8): CNBlock(
(block): Sequential(
(0): Conv2d(384, 384, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=384)
(1): Permute()
(2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=384, out_features=1536, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=1536, out_features=384, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.0823529411764706, mode=row)
)
)
(6): Sequential(
(0): LayerNorm2d((384,), eps=1e-06, elementwise_affine=True)
(1): Conv2d(384, 768, kernel_size=(2, 2), stride=(2, 2))
)
(7): Sequential(
(0): CNBlock(
(block): Sequential(
(0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768)
(1): Permute()
(2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=768, out_features=3072, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=3072, out_features=768, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.08823529411764706, mode=row)
)
(1): CNBlock(
(block): Sequential(
(0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768)
(1): Permute()
(2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=768, out_features=3072, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=3072, out_features=768, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.09411764705882353, mode=row)
)
(2): CNBlock(
(block): Sequential(
(0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=768)
(1): Permute()
(2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
(3): Linear(in_features=768, out_features=3072, bias=True)
(4): GELU(approximate=none)
(5): Linear(in_features=3072, out_features=768, bias=True)
(6): Permute()
)
(stochastic_depth): StochasticDepth(p=0.1, mode=row)
)
)
)
(avgpool): AdaptiveAvgPool2d(output_size=1)
(classifier): Sequential(
(0): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True)
(1): Flatten(start_dim=1, end_dim=-1)
(2): Linear(in_features=768, out_features=4, bias=True)
)
)
那么我们能够顺利的访问模型的每个结构的话,修改的话也就十分简单了。
model.classifier.add_module("add_dropout",nn.Dropout())
print(model)
参考链接: https://blog.csdn.net/ltochange/article/details/121421776
https://blog.csdn.net/qq_39332551/article/details/124943453