一、张量的基本初始化方式
构建一个卷积核参数,如下
# kernel_size: 2x2, in_channel: 1 -> out_channel:2
tensor = torch.empty(2,1,2,2)
print(tensor)
5种基本初始化方式
# 1、均匀分布, 不是取值在0-1 范围内吗, 是的
w_uniform = init.uniform_(tensor, a = 0., b = 1.)
print("w_uniform\n", w_uniform)
# 2、正态分布
w_normal = init.normal_(tensor, mean = 0., std = 1.)
print("w_normal\n", w_normal)
# 3、常量初始化
w_constant = init.constant_(tensor, val = 0.5)
print(w_constant)
# 4、0初始化, 5、1 初始化
zero = init.zeros_(tensor)
one = init.ones_(tensor)
二、xavier 初始化
辅助函数
calculate_gian : 根据不同的非线性行函数,返回不同的增益率,如下所示
get gain
1 : Linear / Identity Conv{1,2,3}D Sigmoid
5/3 Tanh
sqrt(2) : ReLU
sqrt(2/(1 + negative_slope^2)) : Leaky Relu
gains = init.calculate_gain("relu")
print(gains)
# 计算维度
dimensions = tensor.dim()
print(dimensions)
# numel 计算累乘, size: 是感受视野的大小
size = tensor[0][0].numel()
print("size\n",size)
#xavier_uniform_
# a = gain * sqrt(6 / (fan_in + fan_out))
# fan_in = kernel_size[0] * kernel_size[1] * in_channels
# fan_out = kernel_size[0] * kernel_size[1] * out_channels
# gain: default -> 1
w_xavier_uniform = init.xavier_uniform_(tensor, init.calculate_gain('relu'))
print("w_xavier_uniform:\n", w_xavier_uniform )
# xavier_normal_
# std = gain * sqrt(2 / (fan_in + fan_out))
w_xavier_normal = init.xavier_normal_(tensor)
print("w_xavier_normal:\n", w_xavier_normal)
#kaiming_uniform_
# bound = gain * sqrt(3 / fan_mode)
# fan_mode -> [fan_in, fan_out], default is fan_in
“”"
mode – either ‘fan_in’ (default) or ‘fan_out’.
Choosing ‘fan_in’ preserves the magnitude of the variance of the weights in the forward pass.
Choosing ‘fan_out’ preserves the magnitudes in the backwards pass.
“”"
# gain: 同上
# nonlinearity: use to get the gain by init.calculate_gain()
# torch.nn.init.kaiming_uniform_(tensor, a=0, mode=‘fan_in’, nonlinearity=‘leaky_relu’)
# a is only used when nonlinearity is leaky_relu, the negative_slope in calculate_gain()
# 该层后面一层的激活函数中负的斜率(负半轴的?)(默认为ReLU,此时a=0)
三、kaiming初始化
w_kaiming_uniform = init.kaiming_uniform_(tensor, mode = 'fan_in', nonlinearity = 'relu')
print("w_kaiming_uniform:\n", w_kaiming_uniform)
# kaiming_normal_
# std = gain * sqrt(1 / fan_mode)
w_kaiming_normal = init.kaiming_normal_(tensor)
print("w_kaiming_normal:\n", w_kaiming_normal)
四、模型初始化
两种形式
1、定义一个初始化函数 f ,直接调用apply
from torch.nn import init
def weight_init(m):
if isinstance(m, nn.Conv2d):
init.xavier_normal_(m.weight.data)
if m.bias is not None :
init.constant_(m.bias.data, 0.1)
elif isinstance(m, nn.BatchNormal2d):
m.weight.data.fill_(1.)
m.bias.zero_()
elif isinstance(m, nn.Linear):
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
net = model()
# 初始化
net.apply(weight_init)
2、逐个遍历module初始化
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight)
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std = 1e-3)
if m.bias is not None:
init.constant_(m.bias, 0)