8 - nn.Module源码讲解(下)


  • 1. 模型的保存和加载 (权重保存-简易方法)
  • 2. 保存checkpoint
    • 2.1 导入所有相关的库
    • 2.2 定义并初始化神经网络
    • 2.3 初始化优化器
    • 2.4 保存常规检查点checkpoint
    • 2.5 加载常规检查点checkpoint
  • 3. to 操作
  • 4. _parameters和parameters的区别
  • 5. state_dict
  • 6. parameters&named_parameters
  • 7. _modules&named_modules

1. 模型的保存和加载 (权重保存-简易方法)

  • 保存状态字典
import torch
from torch import nn
import torchvision.models as models

# 从torchvision中下载一个预训练的模型vgg16 
mymodel = models.vgg16(pretrained=True)

# 保存模型的权重state_dict
torch.save(mymodel.state_dict(), "model_weights.pth")
  • 创建一个需要加载模型的实例
# 创建一个新的实例,跟以前的模型一致
new_model = models.vgg16()
  • 将保存好的权重加载到新的模型中
# 将保存好的权重加载到新的模型中
# 模型进入推理模式,会影响到dropout和batchnorm

2. 保存checkpoint


2.1 导入所有相关的库

import torch
import torch.nn as nn
import torch.optim as optim
from torch import functional as F

2.2 定义并初始化神经网络

# 定义神经网络
class Net(nn.Module):
	def __init__(self):
		super(Net, self).__init__()
		self.conv1 = nn.Conv2d(3, 6, 5)
		self.pool = nn.MaxPool2d(2, 2)
		self.conv2 = nn.Conv2d(6, 16, 5)
		self.fc1 = nn.Linear(16 * 5 * 5, 120)
		self.fc2 = nn.Linear(120, 84)
		self.fc3 = nn.Linear(84, 10)

	def forward(self, x):
		x = self.pool(F.relu(self.conv1(x)))
		x = self.pool(F.relu(self.conv2(x)))
		x = x.view(-1, 16 * 5 * 5)
		x = F.relu(self.fc1(x))
		x = F.relu(self.fc2(x))
		x = self.fc3(x)
		return x

# 实例化神经网络
net = Net()

2.3 初始化优化器


  • lr :学习率 ;learning_rate
  • momentum:动量
# 优化器的实例化及超参数的设置
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

2.4 保存常规检查点checkpoint

# Additional information
# epoch 迭代次数设置
# 模型
PATH = "model.pt"
# 损失值
LOSS = 0.4

# 逐个保存模型的相关参数
	'epoch': EPOCH,
	'model_state_dict': net.state_dict(),
	'optimizer_state_dict': optimizer.state_dict(),
	'loss': LOSS,
}, PATH)

2.5 加载常规检查点checkpoint

# 实例化模型
model = Net()
# 实例化优化器
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# 加载保存的model.pt
checkpoint = torch.load(PATH)

# 加载权重
# 加载优化器值
# 加载迭代次数epoch
epoch = checkpoint['epoch']
# 加载损失loss
loss = checkpoint['loss']
# 设置模型为推理模式


3. to 操作


# 1.导入相关库
import torch
from torch import nn

# 2.定义模型
class MyTest(nn.Module):
	def __init__(self):
		super(MyTest, self).__init__()
		self.linear1 = nn.Linear(2, 3)
		self.linear2 = nn.Linear(3, 4)
		self.batchnorm = nn.BatchNorm2d(4)

# 3.实例化神经网络
mymodel = MyTest()

# 4.打印出神经网络中linear1的权重参数类型linear1.weights.dtype = torch.float32

# 5.将模型中的参数类型转换成torch.double

# 6.打印出神经网络中linear1的权重参数类型linear1.weights.dtype = torch.float64

# 转换成功 torch.float32 -> torch.float64

4. _parameters和parameters的区别

  • _parameters:针对的是当前模型中是否有nn.parameters,跟子类无关
  • parameters:不仅跟当前模型的parameters有关,还跟其子类的parameters有关
# 1.导入相关库
import torch
from torch import nn

# 2.定义模型
class MyTest(nn.Module):
	def __init__(self):
		super(MyTest, self).__init__()
		self.linear1 = nn.Linear(2, 3)
		self.linear2 = nn.Linear(3, 4)
		self.batchnorm = nn.BatchNorm2d(4)

# 3.实例化神经网络
mymodel = MyTest()

# 4.打印出神经网络中linear1的权重参数类型linear1.weights.dtype = torch.float32

# 5.将模型中的参数类型转换成torch.double

# 6.打印出神经网络中linear1的权重参数类型linear1.weights.dtype = torch.float64

# 转换成功 torch.float32 -> torch.float64

# _paramters 返回的是当前网络中是否有parameters,跟子模块的parameters无关
# 所以 mymodel._parameters=OrderedDict()
# 同理_buffers一致

# 逐个迭代模型中的参数
for pa in mymodel.parameters():

5. state_dict

  • 作用: 保存模型的参数
    def _save_to_state_dict(self, destination, prefix, keep_vars):
        r"""Saves module state to `destination` dictionary, containing a state
        of the module, but not its descendants. This is called on every
        submodule in :meth:`~torch.nn.Module.state_dict`.

        In rare cases, subclasses can achieve class-specific behavior by
        overriding this method with custom logic.

            destination (dict): a dict where state will be stored
            prefix (str): the prefix for parameters and buffers used in this
        for name, param in self._parameters.items():
            if param is not None:
                destination[prefix + name] = param if keep_vars else param.detach()
        for name, buf in self._buffers.items():
            if buf is not None and name not in self._non_persistent_buffers_set:
                destination[prefix + name] = buf if keep_vars else buf.detach()

    # The user can pass an optional arbitrary mappable object to `state_dict`, in which case `state_dict` returns
    # back that same object. But if they pass nothing, an `OrederedDict` is created and returned.
    T_destination = TypeVar('T_destination', bound=Mapping[str, Tensor])

    def state_dict(self, destination=None, prefix='', keep_vars=False):
        r"""Returns a dictionary containing a whole state of the module.

        Both parameters and persistent buffers (e.g. running averages) are
        included. Keys are corresponding parameter and buffer names.

                a dictionary containing a whole state of the module


            >>> module.state_dict().keys()
            ['bias', 'weight']

        if destination is None:
            destination = OrderedDict()
            destination._metadata = OrderedDict()
        destination._metadata[prefix[:-1]] = local_metadata = dict(version=self._version)
        # 遍历当前模型中模块的state_dict
        self._save_to_state_dict(destination, prefix, keep_vars)
        # 遍历子模型中模块的state_dict
        for name, module in self._modules.items():
            if module is not None:
                module.state_dict(destination, prefix + name + '.', keep_vars=keep_vars)
        for hook in self._state_dict_hooks.values():
            hook_result = hook(self, destination, prefix, local_metadata)
            if hook_result is not None:
                destination = hook_result
        return destination
  • 案例代码
# 1.导入相关库
import torch
from torch import nn

# 2.定义模型
class MyTest(nn.Module):
	def __init__(self):
		super(MyTest, self).__init__()
		self.linear1 = nn.Linear(2, 3)
		self.linear2 = nn.Linear(3, 4)
		self.batchnorm = nn.BatchNorm2d(4)

# 3.实例化神经网络
mymodel = MyTest()

  • 输出结果
mymodel.state_dict()=OrderedDict([('linear1.weight', tensor([[ 0.2309,  0.1947],
        [ 0.1572, -0.5997],
        [ 0.4253,  0.1184]], dtype=torch.float64)), ('linear1.bias', tensor([ 0.0010, -0.1031, -0.2801], dtype=torch.float64)), ('linear2.weight', tensor([[-0.4427,  0.0356,  0.0527],
        [-0.1414,  0.4508,  0.5320],
        [ 0.3816,  0.3372,  0.3967],
        [-0.1054,  0.1467, -0.5630]], dtype=torch.float64)), ('linear2.bias', tensor([ 0.3134, -0.3881,  0.2067, -0.1626], dtype=torch.float64)), ('batchnorm.weight', tensor([1., 1., 1., 1.], dtype=torch.float64)), ('batchnorm.bias', tensor([0., 0., 0., 0.], dtype=torch.float64)), ('batchnorm.running_mean', tensor([0., 0., 0., 0.], dtype=torch.float64)), ('batchnorm.running_var', tensor([1., 1., 1., 1.], dtype=torch.float64)), ('batchnorm.num_batches_tracked', tensor(0))])

6. parameters&named_parameters

  • parameters:返回参数的张量值
  • named_parameters():返回参数的名称和张量值
    8 - nn.Module源码讲解(下)_第1张图片
# 1.导入相关库
import torch
from torch import nn

# 2.定义模型
class MyTest(nn.Module):
	def __init__(self):
		super(MyTest, self).__init__()
		self.linear1 = nn.Linear(2, 3)
		self.linear2 = nn.Linear(3, 4)
		self.batchnorm = nn.BatchNorm2d(4)

# 3.实例化神经网络
mymodel = MyTest()

for pa in mymodel.parameters():

for pad in mymodel.named_parameters():
parameters=Parameter containing:
tensor([[ 0.3966, -0.1722],
        [-0.6319,  0.4421],
        [ 0.1774,  0.5560]], requires_grad=True)
parameters=Parameter containing:
tensor([ 0.6004,  0.4914, -0.6790], requires_grad=True)
parameters=Parameter containing:
tensor([[ 0.5584,  0.4561,  0.3161],
        [-0.2900,  0.4303,  0.4115],
        [ 0.4425, -0.1321, -0.1889],
        [-0.4999, -0.3429, -0.2785]], requires_grad=True)
parameters=Parameter containing:
tensor([-0.4464, -0.3374, -0.0186, -0.1464], requires_grad=True)
parameters=Parameter containing:
tensor([1., 1., 1., 1.], requires_grad=True)
parameters=Parameter containing:
tensor([0., 0., 0., 0.], requires_grad=True)
named_parameters=('linear1.weight', Parameter containing:
tensor([[ 0.3966, -0.1722],
        [-0.6319,  0.4421],
        [ 0.1774,  0.5560]], requires_grad=True))
named_parameters=('linear1.bias', Parameter containing:
tensor([ 0.6004,  0.4914, -0.6790], requires_grad=True))
named_parameters=('linear2.weight', Parameter containing:
tensor([[ 0.5584,  0.4561,  0.3161],
        [-0.2900,  0.4303,  0.4115],
        [ 0.4425, -0.1321, -0.1889],
        [-0.4999, -0.3429, -0.2785]], requires_grad=True))
named_parameters=('linear2.bias', Parameter containing:
tensor([-0.4464, -0.3374, -0.0186, -0.1464], requires_grad=True))
named_parameters=('batchnorm.weight', Parameter containing:
tensor([1., 1., 1., 1.], requires_grad=True))
named_parameters=('batchnorm.bias', Parameter containing:
tensor([0., 0., 0., 0.], requires_grad=True))

7. _modules&named_modules

  • _module:返回模型中的所有子模块
  • named_modules:返回模型中的所有子模块和自身
# 1.导入相关库
import torch
from torch import nn

# 2.定义模型
class MyTest(nn.Module):
	def __init__(self):
		super(MyTest, self).__init__()
		self.linear1 = nn.Linear(2, 3)
		self.linear2 = nn.Linear(3, 4)
		self.batchnorm = nn.BatchNorm2d(4)

# 3.实例化神经网络
mymodel = MyTest()


print("*" * 10)
for named_modules in mymodel.named_modules():
mymodel._modules=OrderedDict([('linear1', Linear(in_features=2, out_features=3, bias=True)), ('linear2', Linear(in_features=3, out_features=4, bias=True)), ('batchnorm', BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))])
named_modules=('', MyTest(
  (linear1): Linear(in_features=2, out_features=3, bias=True)
  (linear2): Linear(in_features=3, out_features=4, bias=True)
  (batchnorm): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
named_modules=('linear1', Linear(in_features=2, out_features=3, bias=True))
named_modules=('linear2', Linear(in_features=3, out_features=4, bias=True))
named_modules=('batchnorm', BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True))
