模型参数的访问、初始化和共享

import torch
from torch import nn
from torch.nn import init

print(torch.__version__)
net = nn.Sequential(nn.Linear(4, 3), nn.ReLU(), nn.Linear(3, 1))
# 构造模型
print(net)
# 构造输入数据
X = torch.rand(2, 4)
Y = net(X).sum()
# 输出结果
print(net(X))
print(net(X).sum())

访问模型参数

print(type(net.named_parameters()))
for name, param in net.named_parameters():# 查看模型所有参数和维度
    print(name, param.size())
for name, param in net[0].named_parameters(): # 查看模型第一层层参数和维度
    print(name, param.size(), type(param))
class MyModel(nn.Module):
    def __init__(self, **kwargs):
        super(MyModel, self).__init__(**kwargs)
        self.weight1 = nn.Parameter(torch.rand(20, 20))
        self.weight2 = torch.rand(20, 20)
    def forward(self, x):
        pass
    
n = MyModel()
for name, param in n.named_parameters():
    print(name)
# 只有weight1在参数列表中
weight_0 = list(net[0].parameters())[0]
print(weight_0.data)# 查看权重
print(weight_0.grad)# 未经历反向传播,无梯度
Y.backward()# 反向传播
print(weight_0.grad)

初始化模型参数

for name, param in net.named_parameters():
    if 'weight' in name:  # 初始化权重参数
        init.normal_(param, mean=0, std=0.01)
        print(name, param.data)
for name, param in net.named_parameters():
    if 'bias' in name: # 初始化偏置参数
        init.constant_(param, val=0)
        print(name, param.data)

自定义初始化方法

def normal_(tensor, mean=0, std=1): # 改变tensor的值,并无梯度追踪
    with torch.no_grad():
        return tensor.normal_(mean, std)
# 将一半tensor初始化为0,另一半为[-10,-5][5, 10]区间均匀分布的随机数
def init_weight_(tensor): 
    with torch.no_grad():
        tensor.uniform_(-10, 10)
        tensor *= (tensor.abs() >= 5).float()
# 只对权重参数初始化
for name, param in net.named_parameters():
    if 'weight' in name:
        init_weight_(param)
        print(name, param.data)

共享模型参数

linear = nn.Linear(1, 1, bias=False)
net = nn.Sequential(linear, linear) # 调用2次linear建层,实现权重参数共享
print(net)
# 初始化参数
for name, param in net.named_parameters():
    init.constant_(param, val=3)
    print(name, param.data)
# 判断内存地址是否相同
print(id(net[0]) == id(net[1])) 
print(id(net[0].weight) == id(net[1].weight))
x = torch.ones(1, 1)
y = net(x).sum()
print(y)
y.backward()
print(net[0].weight.grad)# 共享参数也进行梯度累加

你可能感兴趣的:(深度学习)