归一化维度:[N,H,W] 计算C次均值方差
import torch
from torch import nn
#BatchNorm2d
input = torch.randn((8,32,32,3))
input_ = input.permute(0,3,1,2)
print(input_.shape)
BN = nn.BatchNorm2d(3)
output = BN(input_)
print(output[:,0,:,:])
#test
X = input_[:,0,:,:] #N,H,W
print(X.shape)
mean = torch.mean(X)
var = torch.var(X)
BN_one = (input_[:,0,:,:] - mean) / torch.pow(var + BN.eps,0.5) * BN.weight[0] + BN.bias[0]
print("BN_one:",BN_one)
torch.Size([8, 3, 32, 32])
tensor([[[ 1.6338e-01, -2.3648e-01, -1.2965e+00, ..., 9.6178e-01,
-4.1027e-01, 1.4072e+00],
[-9.9580e-02, -8.1695e-02, -2.2693e-01, ..., 1.1076e+00,
2.3096e-01, -1.4278e+00],
[ 1.2291e+00, 1.0623e+00, -5.4392e-01, ..., -4.3424e-02,
-2.2262e-01, -5.1729e-01],
...,
[-1.8724e+00, 5.1297e-01, 6.1065e-01, ..., 1.4684e-01,
-8.5345e-02, -1.9820e+00],
[-1.8862e-02, -5.1397e-01, -8.9240e-01, ..., 1.0580e+00,
1.3407e+00, 2.6915e+00],
[ 2.9983e-01, -8.8519e-01, -7.3509e-01, ..., 3.3777e-01,
4.3252e-01, 3.9348e-01]]], grad_fn=<SliceBackward0>)
torch.Size([8, 32, 32])
BN_one: tensor([[[ 1.6337e-01, -2.3647e-01, -1.2964e+00, ..., 9.6173e-01,
-4.1024e-01, 1.4071e+00],
[-9.9574e-02, -8.1690e-02, -2.2692e-01, ..., 1.1076e+00,
2.3094e-01, -1.4277e+00],
[ 1.2290e+00, 1.0623e+00, -5.4388e-01, ..., -4.3422e-02,
-2.2260e-01, -5.1726e-01],
...,
[-1.8722e+00, 5.1294e-01, 6.1061e-01, ..., 1.4683e-01,
-8.5340e-02, -1.9818e+00],
[-1.8860e-02, -5.1393e-01, -8.9234e-01, ..., 1.0580e+00,
1.3407e+00, 2.6914e+00],
[ 2.9981e-01, -8.8513e-01, -7.3504e-01, ..., 3.3775e-01,
4.3249e-01, 3.9345e-01]]], grad_fn=<AddBackward0>)
归一化维度:[C,H,W] 计算N次均值方差
import torch
from torch import nn
input = torch.randn((8,3,32,32))
LN = nn.LayerNorm((3,32,32))
output = LN(input)
print(output[0])
#test
mean = torch.mean(input[0,:,:,:])
var = torch.var(input[0,:,:,:])
print(mean)
print(var)
LN_one = ((input[0] - mean) / torch.pow(var + LN.eps,0.5)) * LN.weight[0] + LN.bias[0]
print(LN_one)
tensor([[[ 0.4859, -0.8371, -1.4560, ..., 0.2984, -2.5241, 0.8989],
[ 2.0582, -0.8824, 0.6683, ..., 1.9867, -0.5992, 0.3718],
[ 0.0882, -1.7858, -0.2839, ..., 1.7095, 2.0336, 0.7063],
...,
[-1.2601, -2.2888, -0.5641, ..., 0.3681, 1.0542, 0.5740],
[ 0.5747, -0.5288, 0.3245, ..., 0.8172, -0.2308, -0.1831],
[-1.2777, -0.1367, 0.1523, ..., -1.9920, 1.3162, -0.3111]]],
grad_fn=<SelectBackward0>)
tensor(0.0257)
tensor(0.9927)
tensor([[[ 0.4858, -0.8370, -1.4558, ..., 0.2984, -2.5237, 0.8987],
[ 2.0579, -0.8822, 0.6682, ..., 1.9863, -0.5991, 0.3718],
[ 0.0882, -1.7855, -0.2838, ..., 1.7092, 2.0332, 0.7061],
...,
[-1.2599, -2.2884, -0.5640, ..., 0.3680, 1.0540, 0.5739],
[ 0.5746, -0.5288, 0.3244, ..., 0.8171, -0.2307, -0.1830],
[-1.2775, -0.1367, 0.1523, ..., -1.9917, 1.3160, -0.3110]]],
grad_fn=<AddBackward0>)
还有一种LayerNorm,是对最后一维进行归一化
import torch
from torch import nn
#还有一种LayerNorm,是对最后一维进行归一化
input = torch.randn((8,3,32,32))
input_ = input.permute(0,2,3,1)
# print(input_)
print(input_.shape)
LN = nn.LayerNorm(3)
output = LN(input_)
print(output[0,0,0,:])
#test
mean = torch.mean(input_[0,0,0,:])
var = torch.var(input_[0,0,0,:],unbiased=False) #注意,这里使用的是,有偏样本方差
# print(LN.weight.shape)
LN_one = ((input_[0,0,0,:] - mean) / torch.pow(var + LN.eps,0.5)) * LN.weight[0] + LN.bias[0]
print(LN_one)
torch.Size([8, 32, 32, 3])
tensor([ 1.3608, -0.3470, -1.0138], grad_fn=<SliceBackward0>)
tensor([ 1.3608, -0.3470, -1.0138], grad_fn=<AddBackward0>)
归一化维度:[H,W] 计算N*C次均值方差
import torch
from torch import nn
input = torch.randn((8,3,32,32))
IN = nn.InstanceNorm2d(3,affine=True) #默认没有学习参数的False
output = IN(input)
print(output[0,0,:,:])
#test
mean = torch.mean(input[0,0,:,:])
var = torch.var(input[0,0,:,:])
# print((input[0,0,:,:] - mean) / torch.pow(var + IN.eps,0.5)* IN.weight[0])
IN_one = ((input[0,0,:,:] - mean) / torch.pow(var + IN.eps,0.5)) * IN.weight[0] + IN.bias[0]
print(IN_one)
tensor([[ 0.4273, 0.9564, -1.8870, ..., -0.6834, -2.2355, 1.1222],
[ 0.8470, 0.0389, 1.2515, ..., -0.7465, -0.6226, -0.4495],
[-0.4743, -0.6062, 0.5174, ..., 0.9884, 2.7023, 1.2147],
...,
[-0.3232, -0.2927, 0.6052, ..., 1.4219, 0.1004, -0.4636],
[ 0.9279, -0.1547, -0.3453, ..., 0.7218, 0.7376, 2.1859],
[-0.8052, 1.1261, -0.3317, ..., 0.2217, -2.0800, 1.2837]],
grad_fn=<SliceBackward0>)
tensor([[ 0.4271, 0.9559, -1.8861, ..., -0.6830, -2.2344, 1.1217],
[ 0.8466, 0.0389, 1.2508, ..., -0.7461, -0.6223, -0.4492],
[-0.4740, -0.6059, 0.5171, ..., 0.9879, 2.7009, 1.2141],
...,
[-0.3231, -0.2926, 0.6049, ..., 1.4212, 0.1004, -0.4634],
[ 0.9274, -0.1546, -0.3451, ..., 0.7214, 0.7372, 2.1849],
[-0.8048, 1.1255, -0.3315, ..., 0.2216, -2.0790, 1.2831]],
grad_fn=<AddBackward0>)
归一化维度:[N,C,H,W] -> [N,G,C//G,H,W] -> [C//G,H,W] 计算 N * G次均值方差
import torch
from torch import nn
input = torch.randn((8,6,32,32))
GN = nn.GroupNorm(3,6) #Separate 6 channels into 3 groups
output = GN(input)
print(output[0,0,:,:])
#test
input_ = input.reshape(8,3,2,32,32)
print(input_.shape)
mean = torch.mean(input_[0,0,:,:,:])
var = torch.var(input_[0,0,:,:,:])
GN_one = (input_[0,0,:,:,:] - mean) / torch.pow(var + GN.eps,0.5) * GN.weight[0] + GN.bias[0]
print(GN_one[0])
tensor([[-1.4510, 1.2567, -1.4740, ..., -0.5039, 0.8245, -0.2772],
[-0.0563, 0.4528, 0.7252, ..., -1.4089, -1.6286, 0.1479],
[-0.7159, -0.3537, 0.2684, ..., -0.6500, -1.3845, -0.1949],
...,
[-0.0898, -0.5160, -1.6077, ..., -0.0275, 1.0162, 0.1682],
[-1.2064, 1.1916, -0.1666, ..., -0.0890, 2.0758, -0.5677],
[ 2.1811, 0.0587, 0.6672, ..., -0.0153, 0.1549, -0.2035]],
grad_fn=<SliceBackward0>)
torch.Size([8, 3, 2, 32, 32])
tensor([[-1.4506, 1.2564, -1.4736, ..., -0.5037, 0.8243, -0.2771],
[-0.0563, 0.4527, 0.7250, ..., -1.4086, -1.6282, 0.1479],
[-0.7157, -0.3536, 0.2683, ..., -0.6499, -1.3842, -0.1948],
...,
[-0.0898, -0.5159, -1.6073, ..., -0.0275, 1.0160, 0.1682],
[-1.2061, 1.1913, -0.1666, ..., -0.0889, 2.0752, -0.5676],
[ 2.1806, 0.0587, 0.6671, ..., -0.0153, 0.1549, -0.2035]],
grad_fn=<SelectBackward0>)