这里介绍一下Pytorch中几种查看模型信息和参数的方式。
首先模型定义如下:
import torch
import torch.nn as nn
import torch.nn.functional as F
def autopad(k, p=None): # kernel, padding
# Pad to 'same'
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
# silu()激活函数:x*sigmoid(x)
class Conv(nn.Module):
# Conv+BN+Relu
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super(Conv, self).__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
def forward(self, x):
return self.act(self.bn(self.conv(x)))
class Res_block1(nn.Module):
def __init__(self, c=64, k=3, g=1, act=True):
super(Res_block1, self).__init__()
self.conv1 = Conv(c1=c, c2=c, k=k, g=g, act=act)
self.conv2 = Conv(c1=c, c2=c, k=k, g=g, act=None)
self.act1 = nn.ReLU()
def forward(self, x):
return self.act1(x + self.conv2(self.conv1(x)))
这个函数返回模型的所有组成模块,包括子模块的组成模块。测试代码如下:
model1 = Res_block1()
for n in model1.modules():
print(n)
model是Res_block1模块的实例,有三个子模块:conv1,conv2和act1,而conv1是Conv模块的实例,由三个子模块conv,bn和act组成。所以最终输出为:
Res_block1(
(conv1): Conv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): SiLU()
)
(conv2): Conv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
(act1): ReLU()
)
Conv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): SiLU()
)
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
SiLU()
Conv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
Identity()
ReLU()
此函数返回的东西和modules()差不多,只不过每个模块的实例的名称也返回了。测试如下:
model1 = Res_block1()
for name, p in model1.named_modules():
print(name, ':', p)
if isinstance(p, nn.Conv2d):
print('Yes nn.Conv2d!')
elif isinstance(p, Conv):
print('Yes Conv!')
elif isinstance(p, Res_block1):
print('Yes Res_block1!')
可以看到这个p和前面那个n是一样的,都是某个模块类的实例。我们可以根据其归属于哪个类,来执行相关的操作(比如参数初始化),我这里是直接输出一段话。上面代码段的输出结果如下:
: Res_block1(
(conv1): Conv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): SiLU()
)
(conv2): Conv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
(act1): ReLU()
)
Yes Res_block1!
conv1 : Conv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): SiLU()
)
Yes Conv!
conv1.conv : Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
Yes nn.Conv2d!
conv1.bn : BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
conv1.act : SiLU()
conv2 : Conv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act): Identity()
)
Yes Conv!
conv2.conv : Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
Yes nn.Conv2d!
conv2.bn : BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
conv2.act : Identity()
act1 : ReLU()
通过此函数访问模型的可训练参数,例如卷积层的权重weights和偏置bias。测试代码:
(可以通过p.requires_grad = False
来冻结参数)
model1 = Res_block1()
for p in model1.parameters():
print(type(p), p.shape)
print('conv1子模块:')
for p in model1.conv1.parameters():
print(type(p), p.shape)
输出为:
> torch.Size([64, 64, 3, 3])
> torch.Size([64])
> torch.Size([64])
> torch.Size([64, 64, 3, 3])
> torch.Size([64])
> torch.Size([64])
conv1子模块:
> torch.Size([64, 64, 3, 3])
> torch.Size([64])
> torch.Size([64])
此函数相比于parameters(),还会返回每个参数的名称。测试如下:
model1 = Res_block1()
for name, p in model1.named_parameters():
print(name, ' :', p.shape)
输出:
conv1.conv.weight : torch.Size([64, 64, 3, 3])
conv1.bn.weight : torch.Size([64])
conv1.bn.bias : torch.Size([64])
conv2.conv.weight : torch.Size([64, 64, 3, 3])
conv2.bn.weight : torch.Size([64])
conv2.bn.bias : torch.Size([64])
这个函数在保存和加载模型权重时经常会用到。不同于前面几个函数,这个返回的是一个字典,而前面几个返回的是一个迭代器。
字典的key是参数名称,value是参数值,其实和named_parameters()的输出差不多。
测试如下:
model1 = Res_block1()
for k, v in model1.state_dict().items():
print(k, ' :', v.shape)
输出:
conv1.conv.weight : torch.Size([64, 64, 3, 3])
conv1.bn.weight : torch.Size([64])
conv1.bn.bias : torch.Size([64])
conv1.bn.running_mean : torch.Size([64])
conv1.bn.running_var : torch.Size([64])
conv1.bn.num_batches_tracked : torch.Size([])
conv2.conv.weight : torch.Size([64, 64, 3, 3])
conv2.bn.weight : torch.Size([64])
conv2.bn.bias : torch.Size([64])
conv2.bn.running_mean : torch.Size([64])
conv2.bn.running_var : torch.Size([64])
conv2.bn.num_batches_tracked : torch.Size([])