首先定义网络模型
import torch
import torch.nn as nn
import numpy as np
# 设计模型 CNN
class CNN(torch.nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=(5,5)) #卷积层1
self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=(5,5)) #卷积层2
self.pooling = torch.nn.MaxPool2d(2) #池化层
self.fc1 = torch.nn.Linear(320, 256) #全连接层1
self.fc2 = torch.nn.Linear(256, 128) #全连接层2
self.fc3 = torch.nn.Linear(128, 10) #全连接层3
def forward(self, x):
# x.shape = 256*1*28*28
batch_size = x.size(0) # 256
# 1*28*28 -> 10*24*24 -> 10*12*12
x = F.relu(self.pooling(self.conv1(x))) #卷积层1->池化层->激活函数Relu
# 10*12*12-> 20*8*8 ->20*4*4
x = F.relu(self.pooling(self.conv2(x))) #卷积层2->池化层->激活函数Relu
# 20*4*4 -> 320
x = x.view(batch_size, -1) #改变张量的维度
# 320 -> 256
x = self.fc1(x) #全连接层1
# 256 -> 128
x = self.fc2(x) #全连接层2
# 128 ->10
x = self.fc3(x) #全连接层3
return x
model = CNN() #实例化()模型为model
# 以下6个函数返回值是一个生成器,通过 for 循环将内容保存在一个列表里
print('model.modules()',model.modules())
print('model.named_modules()', model.named_modules())
print('model.children()',model.children())
print('model.named_children()',model.named_children())
print('model.parameters()',model.parameters())
print('model.named_parameters()',model.named_parameters())
遍历model的所有子层,也包括所有子层的子层,子层是指继承了 nn.Module 类的层。CNN() 本身,self.conv1、self.conv2、self.pooling、self.fc1、self.fc2、self.fc3。
model_modules = [m for m in model.modules()]
print('len(model_modules)=',len(model_modules))
print('model_modules',model_modules)
len(model_modules)= 7
model_modules [CNN(
(conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
(conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
(pooling): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(fc1): Linear(in_features=320, out_features=256, bias=True)
(fc2): Linear(in_features=256, out_features=128, bias=True)
(fc3): Linear(in_features=128, out_features=10, bias=True)
), Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1)), Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1)), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Linear(in_features=320, out_features=256, bias=True), Linear(in_features=256, out_features=128, bias=True), Linear(in_features=128, out_features=10, bias=True)]
model.named_modules()就是带有layer name的model.modules()
model_named_modules = [m for m in model.named_modules()]
print('len(model_named_modules)=',len(model_named_modules))
print('model_named_modules',model_named_modules)
len(model_named_modules)= 7
model_named_modules [(‘’, CNN(
(conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
(conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
(pooling): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(fc1): Linear(in_features=320, out_features=256, bias=True)
(fc2): Linear(in_features=256, out_features=128, bias=True)
(fc3): Linear(in_features=128, out_features=10, bias=True)
)), (‘conv1’, Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))), (‘conv2’, Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))), (‘pooling’, MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)), (‘fc1’, Linear(in_features=320, out_features=256, bias=True)), (‘fc2’, Linear(in_features=256, out_features=128, bias=True)), (‘fc3’, Linear(in_features=128, out_features=10, bias=True))]
model_children只获取 model第二层的网络结构
model_children = [m for m in model.children()]
print('len(model_children)=',len(model_children))
print('model_children',model_children)
len(model_children)= 6
model_children [Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1)), Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1)), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Linear(in_features=320, out_features=256, bias=True), Linear(in_features=256, out_features=128, bias=True), Linear(in_features=128, out_features=10, bias=True)]
model.named_children() 就是带有layer name的 model.children()
model_named_children = [m for m in model.named_children()]
print('len(model_named_children)=',len(model_named_children))
print('model_named_children',model_named_children)
len(model_named_children)= 6
model_named_children [(‘conv1’, Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))), (‘conv2’, Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))), (‘pooling’, MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)), (‘fc1’, Linear(in_features=320, out_features=256, bias=True)), (‘fc2’, Linear(in_features=256, out_features=128, bias=True)), (‘fc3’, Linear(in_features=128, out_features=10, bias=True))]
model.parameters() 迭代地返回 模型所有可学习参数,有些层没有学习的参数不输出(如relu)
model_parameters = [m for m in model.parameters()]
print('len(model_parameters)=',len(model_parameters))
print('model_parameters',model_parameters)
输出结果太长,这里就不打印了
model.named_parameters() 输出包含 layer name 的 model.parameters()
model_named_parameters = [m for m in model.named_parameters()]
print('len(model_named_parameters)=',len(model_named_parameters))
print('model_named_parameters',model_named_parameters)
model.state_dict() 能够获取 模型中的所有参数,包括可学习参数和不可学习参数,其返回值是一个有序字典 OrderedDict。
model中所有的可学习参数(weight、bias),同时还获取了不可学习参数(BN layer 的 running mean 和 running var 等
print('model.state_dict()',model.state_dict())