# !!write above on the first line!!
import random, numpy as np, torch
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)
def _weights_init(m):
"""
intro:
weights init.
finish these:
- torch.nn.Linear
>>> version 1.0.0
if type(m) == nn.Linear:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][0]) # linear - param - weight
nn.init.trunc_normal_(m.weight, std=.01)
if m.bias is not None:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][1]) # linear - param - bias
nn.init.zeros_(m.bias)
args:
:param torch.parameters m: nn.Module
"""
classname = m.__class__.__name__
if type(m) == nn.Linear:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][0]) # linear - param - weight
nn.init.trunc_normal_(m.weight, std=.01)
if m.bias is not None:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][1]) # linear - param - bias
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode="fan_out")
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.LayerNorm):
nn.init.zeros_(m.bias)
nn.init.ones_(m.weight)
elif classname.startswith('Conv'):
m.weight.data.normal_(0.0, 0.02)
elif classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
net = nn.Sequential(nn.LazyLinear(8), nn.ReLU(), nn.LazyLinear(1))
X = torch.rand(size=(2, 4))
net.apply(_weights_init)
正态分布仍然是一个分布,这会使得你的实验结果无法完全复现,要让实验结果完全复现则需要指定seed,如下:
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)
参数初始化是很重要的工作,数据决定了梯度下降的山脉长什么样,而参数初始化决定了你从这个山的哪个合适的地方开始出发。Pytorch提供了多种初始化方法,可以搭配nn.Module.apply
和你自己写的初始化方法函数,来将nn.Module中所有继承自nn.Module子类中的children对象的参数初始化,如下:
常见的bulit-in初始化方法,
"""
6.3.1. Built-in Initialization
using built-in func to init.
- `nn.init.normal_(module.weight, mean=0, std=0.01)`
- `nn.init.zeros_(module.bias)`
- `nn.init.constant_(module.weight, 1)`
- `nn.init.zeros_(module.bias)`
- `nn.init.xavier_uniform_(module.weight)`
- `nn.init.kaiming_uniform_(module.weight)` # default one for Linear, and the type is Leaky_ReLU
- `nn.init.uniform_(module.weight, -10, 10)`
"""
进行初始化,
def init_normal(module):
if type(module) == nn.Linear:
nn.init.normal_(module.weight, mean=0, std=0.01)
nn.init.zeros_(module.bias)
net.apply(init_normal)
print(net[0].weight.data[0])
print(net[0].bias.data[0])
使用自己的初始化方法进行初始化,
def _weights_init(m):
"""
intro:
weights init.
finish these:
- torch.nn.Linear
>>> version 1.0.0
if type(m) == nn.Linear:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][0]) # linear - param - weight
nn.init.trunc_normal_(m.weight, std=.01)
if m.bias is not None:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][1]) # linear - param - bias
nn.init.zeros_(m.bias)
args:
:param torch.parameters m: nn.Module
"""
classname = m.__class__.__name__
if type(m) == nn.Linear:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][0]) # linear - param - weight
nn.init.trunc_normal_(m.weight, std=.01)
if m.bias is not None:
print("Init", *[(name, param.shape) for name, param in m.named_parameters()][1]) # linear - param - bias
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode="fan_out")
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.LayerNorm):
nn.init.zeros_(m.bias)
nn.init.ones_(m.weight)
elif classname.startswith('Conv'):
m.weight.data.normal_(0.0, 0.02)
elif classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
net = nn.Sequential(nn.LazyLinear(8), nn.ReLU(), nn.LazyLinear(1))
X = torch.rand(size=(2, 4))
net.apply(_weights_init)