import torch
import torch.nn as nn
x = torch.randn(6, 512, 1, 1)
print(x, x.shape)
# # x_ = torch.randn(64, 5)
x_ = x.view(6, 512)
m_ = nn.Linear(512, 5)
m__ = nn.Linear(512, 5)
y_ = m_(x_)
y__ = m__(x_)
print(m_.weight, m_.weight.shape)
print(m_.bias, m_.bias.shape)
print(y_, y_.shape)
# linear(512, 5)时候,输入张量形状为(6, 512),输出形状为张量为(6, 5),权重张量形状为(6, 512),偏执张量形状为(5)
print(torch.sum(x_[0]*m_.weight[0])+m_.bias[0], torch.sum(x_[0]*m_.weight[1])+m_.bias[1]) # 此语句为验证输出张量中的前两个元素
print("****"*25)
print(m__.weight, m__.weight.shape)
print(m__.bias, m__.bias.shape)
print(y__, y__.shape)
print(torch.sum(x_[0]*m__.weight[0])+m__.bias[0], torch.sum(x_[0]*m__.weight[1])+m__.bias[1])
m = nn.Conv2d(512, 5, (1, 1), padding=0)
y = m(x)
print(m.weight, m.weight.shape)
print(m.bias, m.bias.shape)
print(y, y.shape)
print(torch.sum(x[0]*m.weight[0])+m.bias[0]) # 此语句为验证输出张量中的第一个元素
print(torch.sum(x[0]*m.weight[1])+m.bias[1]) # 此语句为验证输出张量中的第二个元素
print(torch.sum(x[5]*m.weight[4])+m.bias[4]) # 此语句为验证输出张量中的最后一个元素
输出结果如下所示:
tensor([[[[-0.0227]],
[[-0.3054]],
[[ 0.5264]],
...,
[[ 0.5306]],
[[ 0.0769]],
[[-0.3278]]],
[[[ 0.0224]],
[[ 0.0277]],
[[ 2.0995]],
...,
[[-0.3214]],
[[ 1.1956]],
[[-0.7666]]],
[[[-1.6789]],
[[-0.3657]],
[[ 2.0482]],
...,
[[-1.7332]],
[[-1.7271]],
[[-0.5530]]],
[[[-2.3737]],
[[-0.6993]],
[[-0.6078]],
...,
[[-0.3336]],
[[-0.6361]],
[[ 0.1160]]],
[[[ 0.4130]],
[[ 0.7437]],
[[-1.0734]],
...,
[[-0.8741]],
[[-1.4548]],
[[ 0.1135]]],
[[[-0.3648]],
[[-0.4488]],
[[ 0.2677]],
...,
[[ 1.7004]],
[[-0.4827]],
[[ 2.5061]]]]) torch.Size([6, 512, 1, 1])
Parameter containing:
tensor([[ 3.9740e-02, 1.7094e-02, -3.0316e-02, ..., -4.2785e-02,
-3.9551e-02, 3.5973e-02],
[-3.1560e-02, -4.0857e-02, -7.9212e-03, ..., 1.4893e-02,
-3.4417e-02, 9.5926e-06],
[ 3.4473e-02, 7.5045e-03, 2.1143e-02, ..., 3.7036e-02,
-3.3019e-02, 3.8141e-02],
[-2.5011e-03, -2.6943e-03, -1.5124e-02, ..., -5.4786e-03,
4.3845e-02, -3.4378e-02],
[-3.5882e-02, -3.9723e-02, -3.2679e-02, ..., -1.8698e-02,
-1.6418e-02, -3.4124e-03]], requires_grad=True) torch.Size([5, 512])
Parameter containing:
tensor([-0.0246, -0.0111, 0.0396, 0.0143, -0.0365], requires_grad=True) torch.Size([5])
tensor([[-0.3218, -0.6336, 0.2198, -0.8990, 0.5238],
[-0.4804, -1.2615, -0.5049, 0.0911, -0.2541],
[-0.2615, 0.5038, -0.4231, -1.0328, -0.2629],
[-0.2202, 0.3873, 0.7698, 0.6329, 1.2157],
[-0.1368, -0.6852, -0.4037, 0.2678, -0.9539],
[-0.5605, 0.6245, 0.6762, -0.4133, -0.0435]],
grad_fn=) torch.Size([6, 5])
tensor(-0.3218, grad_fn=) tensor(-0.6336, grad_fn=)
****************************************************************************************************
Parameter containing:
tensor([[ 0.0020, -0.0086, 0.0212, ..., 0.0236, 0.0126, 0.0070],
[ 0.0206, -0.0058, 0.0352, ..., 0.0417, 0.0153, 0.0135],
[-0.0284, -0.0361, 0.0376, ..., -0.0171, 0.0051, -0.0026],
[-0.0027, -0.0329, 0.0306, ..., -0.0401, 0.0155, -0.0312],
[ 0.0326, 0.0103, 0.0237, ..., -0.0341, -0.0203, -0.0278]],
requires_grad=True) torch.Size([5, 512])
Parameter containing:
tensor([ 0.0172, -0.0238, 0.0174, 0.0154, 0.0025], requires_grad=True) torch.Size([5])
tensor([[ 0.4785, -0.7011, 0.0464, -0.3521, -0.2145],
[ 0.3342, -0.9147, 0.3201, 0.6359, 0.2411],
[ 0.3518, -0.1693, 0.6793, 0.3893, 0.7366],
[ 1.1320, -0.8723, 0.4811, 0.0100, -0.0978],
[ 0.6753, 0.2851, -0.4163, 0.4748, 0.9103],
[ 0.2818, 0.3825, -0.0851, 0.0638, -0.2460]],
grad_fn=) torch.Size([6, 5])
tensor(0.4785, grad_fn=) tensor(-0.7011, grad_fn=)
Parameter containing:
tensor([[[[-0.0413]],
[[ 0.0082]],
[[-0.0213]],
...,
[[ 0.0292]],
[[-0.0219]],
[[-0.0164]]],
[[[ 0.0122]],
[[ 0.0354]],
[[ 0.0342]],
...,
[[ 0.0375]],
[[-0.0393]],
[[-0.0441]]],
[[[-0.0149]],
[[ 0.0128]],
[[ 0.0270]],
...,
[[ 0.0256]],
[[-0.0205]],
[[-0.0129]]],
[[[-0.0242]],
[[ 0.0423]],
[[ 0.0200]],
...,
[[-0.0139]],
[[ 0.0431]],
[[ 0.0035]]],
[[[-0.0167]],
[[-0.0199]],
[[-0.0111]],
...,
[[-0.0115]],
[[-0.0092]],
[[-0.0038]]]], requires_grad=True) torch.Size([5, 512, 1, 1])
Parameter containing:
tensor([-0.0060, -0.0302, -0.0181, -0.0238, -0.0309], requires_grad=True) torch.Size([5])
tensor([[[[-7.7635e-04]],
[[ 3.3419e-01]],
[[ 3.7540e-02]],
[[-3.2353e-01]],
[[ 5.2627e-01]]],
[[[ 7.9518e-01]],
[[-1.4631e-01]],
[[-1.2328e+00]],
[[-6.2233e-01]],
[[ 5.4902e-01]]],
[[[ 1.0096e-01]],
[[-4.3371e-02]],
[[ 8.4562e-02]],
[[ 2.9943e-01]],
[[-5.8890e-01]]],
[[[ 2.8188e-01]],
[[ 7.7247e-02]],
[[-1.1808e-01]],
[[ 2.0405e-01]],
[[-1.7719e-01]]],
[[[ 1.9541e-01]],
[[ 4.5732e-01]],
[[-5.3517e-01]],
[[ 3.3248e-01]],
[[ 1.2848e-02]]],
[[[-3.8311e-01]],
[[-3.3499e-01]],
[[ 9.1582e-01]],
[[-4.8706e-01]],
[[ 6.1926e-01]]]], grad_fn=) torch.Size([6, 5, 1, 1])
tensor(-0.0008, grad_fn=)
tensor(0.3342, grad_fn=)
tensor(0.6193, grad_fn=)
Process finished with exit code 0
可以发现,即使对于同一个输入张量,两个linear()层输出的结果不一样,应该和内部的权重、偏执初始化方式有关,这样的话,对于输入为(b,c,1,1)的linear()就相当于kernel_size = (1,1),padding = 0的conv2d()效果一样。