想在 pytorch optimizer 对不同组参数用不同策略,分组用到参数名字。
获取模型所有参数用 model.parameters()
,其中元素是 torch.nn.parameter.Parameter
类型的。
按名访问模型参数有两个途径:
model.state_dict()
:collections.OrderedDict,拿到的是 torch.Tensor
;model.named_parameters()
:generator,拿到的是 torch.nn.parameter.Parameter
,应该是用这个;import torch
import torch.nn as nn
class LeNetPP(nn.Module):
"""LeNet++
https://github.com/ydwen/caffe-face/blob/caffe-face/mnist_example/mnist_train_test.prototxt
"""
def __init__(self):
super(LeNetPP, self).__init__()
self.layer_1 = nn.Sequential(
nn.Conv2d(1, 32, 5, 1, 2),
nn.PReLU(),
nn.Conv2d(32, 32, 5, 1, 2),
nn.PReLU(),
nn.MaxPool2d(2, 2)
)
self.layer_2 = nn.Sequential(
nn.Conv2d(32, 64, 5, 1, 2),
nn.PReLU(),
nn.Conv2d(64, 64, 5, 1, 2),
nn.PReLU(),
nn.MaxPool2d(2, 2)
)
self.layer_3 = nn.Sequential(
nn.Conv2d(64, 128, 5, 1, 2),
nn.PReLU(),
nn.Conv2d(128, 128, 5, 1, 2),
nn.PReLU(),
nn.MaxPool2d(3, 2)
)
self.fc_1 = nn.Sequential(
nn.Linear(1152, 2),
nn.PReLU()
)
self.fc_2 = nn.Linear(2, 10)
def forward(self, x):
x = self.layer_1(x)
x = self.layer_2(x)
x = self.layer_3(x)
x = x.view(x.size(0), -1)
print("conv out:", x.size())
feature = self.fc_1(x)
logit = self.fc_2(feature)
return feature, logit
model = LeNetPP()
print("--- model.parameters() ---")
for p in model.parameters():
print("type:", type(p)) # torch.nn.parameter.Parameter
break
print("\n--- model.state_dict() ---")
print("state_dict type:", type(model.state_dict())) # collections.OrderedDict
print("state_dict:", list(model.state_dict())) # 打印全部参数的名字
print("param type:", type(model.state_dict()["fc_2.bias"])) # torch.Tensor
print("param value:", model.state_dict()["fc_2.bias"])
print("\n--- model.named_parameters() ---")
print("named_parameters type:", type(model.named_parameters())) # generator
p = next(model.named_parameters())
print("elem type:", type(p)) # tuple:(名字,参数)
print("name:", p[0])
for name, param in model.named_parameters():
if name == "fc_2.bias":
print("param type:", type(param)) # torch.nn.parameter.Parameter
print("param value:", param)
--- model.parameters() ---
type: <class 'torch.nn.parameter.Parameter'>
--- model.state_dict() ---
state_dict type: <class 'collections.OrderedDict'>
state_dict: [
'layer_1.0.weight', 'layer_1.0.bias', 'layer_1.1.weight', 'layer_1.2.weight', 'layer_1.2.bias', 'layer_1.3.weight',
'layer_2.0.weight', 'layer_2.0.bias', 'layer_2.1.weight', 'layer_2.2.weight', 'layer_2.2.bias', 'layer_2.3.weight',
'layer_3.0.weight', 'layer_3.0.bias', 'layer_3.1.weight', 'layer_3.2.weight', 'layer_3.2.bias', 'layer_3.3.weight',
'fc_1.0.weight', 'fc_1.0.bias', 'fc_1.1.weight', 'fc_2.weight', 'fc_2.bias']
param type: <class 'torch.Tensor'>
param value: tensor([-0.6480, -0.3849, -0.3259, 0.6984, 0.6055, -0.0741, 0.6410, -0.4339,
0.0896, -0.6722])
--- model.named_parameters() ---
named_parameters type: <class 'generator'>
elem type: <class 'tuple'>
name: layer_1.0.weight
param type: <class 'torch.nn.parameter.Parameter'>
param value: Parameter containing:
tensor([-0.6480, -0.3849, -0.3259, 0.6984, 0.6055, -0.0741, 0.6410, -0.4339,
0.0896, -0.6722], requires_grad=True)