A03.卷积层Conv[torch]

卷积层

  • 1. 简单写一个卷积层
  • 2. 反向传播计算
  • 3. 池化层

1. 简单写一个卷积层

import torch

torch.manual_seed(0)
inputs = torch.ones([1, 4, 4])
conv1 = torch.nn.Conv2d(1, 1, (2, 2), stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros')

output = conv1(inputs)

print(inputs)
for i in conv1.named_parameters():
    print(i)
print(output)
tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])
('weight', Parameter containing:
tensor([[[[-0.0037,  0.2682],
          [-0.4115, -0.3680]]]], requires_grad=True))
('bias', Parameter containing:
tensor([-0.1926], requires_grad=True))
tensor([[[-0.7076, -0.7076, -0.7076],
         [-0.7076, -0.7076, -0.7076],
         [-0.7076, -0.7076, -0.7076]]], grad_fn=<SqueezeBackward1>)
  • 这里基本上把全部参数列出来了,分别是输入维度、输出维度、卷积核大小、步长、填充大小、扩张卷积(空洞卷积)的扩张大小、从输入通道到输出通道的阻塞连接数、偏置、填充模式。
  • 其中group为1时,卷积层在特征维度上的输出就好像是全连接层一样,group增大会显著降低训练参数的量。
import torch

torch.manual_seed(0)
inputs = torch.rand([2, 1, 1])
conv1 = torch.nn.Conv2d(2, 2, (1, 1), stride=1, padding=0, dilation=1, groups=1, bias=False, padding_mode='zeros')
output = conv1(inputs)
print(inputs)
for i in conv1.named_parameters():
    print(i)
print(output)
tensor([[[0.4963]],

        [[0.7682]]])
('weight', Parameter containing:
tensor([[[[-0.5820]],

         [[-0.5204]]],


        [[[-0.2723]],

         [[ 0.1896]]]], requires_grad=True))
tensor([[[-0.6886]],

        [[ 0.0105]]], grad_fn=<SqueezeBackward1>)
import torch

torch.manual_seed(0)
inputs = torch.rand([2, 1, 1])
conv1 = torch.nn.Conv2d(2, 2, (1, 1), stride=1, padding=0, dilation=1, groups=2, bias=False, padding_mode='zeros')
output = conv1(inputs)
print(inputs)
for i in conv1.named_parameters():
    print(i)
print(output)
tensor([[[0.4963]],

        [[0.7682]]])
('weight', Parameter containing:
tensor([[[[-0.8230]]],


        [[[-0.7359]]]], requires_grad=True))
tensor([[[-0.4084]],

        [[-0.5654]]], grad_fn=<SqueezeBackward1>)

2. 反向传播计算

import torch
torch.manual_seed(0)


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layer = torch.nn.Conv2d(1, 1, (2, 2), stride=1, padding=0, dilation=1, groups=1, bias=False, padding_mode='zeros')

    def forward(self, x):
        return self.layer(x)


if __name__ == "__main__":
    inputs = torch.ones([1, 2, 2])
    goals = torch.zeros([1, 1, 1])
    net = Net()
    opt = torch.optim.SGD(net.parameters(), lr=1)
    loss_function = torch.nn.MSELoss()

    output = net(inputs)
    print(inputs)
    for i in net.named_parameters():
        print(i)
    print(output)
    loss = loss_function(output, goals)
    opt.zero_grad()
    loss.backward()
    opt.step()
    for i in net.named_parameters():
        print(i)
    print("-" * 50)
    for i in net.parameters():
        print(i.grad)
tensor([[[1., 1.],
         [1., 1.]]])
('layer.weight', Parameter containing:
tensor([[[[-0.0037,  0.2682],
          [-0.4115, -0.3680]]]], requires_grad=True))
tensor([[[-0.5150]]], grad_fn=<SqueezeBackward1>)
('layer.weight', Parameter containing:
tensor([[[[1.0263, 1.2982],
          [0.6185, 0.6621]]]], requires_grad=True))
--------------------------------------------------
tensor([[[[-1.0300, -1.0300],
          [-1.0300, -1.0300]]]])

x = i n p u t s = [ [ 1 , 1 ] , [ 1 , 1 ] ] 标 记 ω = [ [ w 1 , w 2 ] , [ w 3 , w 4 ] ] y = o u t p u t = [ − 0.5150 ] = [ w 1 + w 2 + w 3 + w 4 ] ∂ δ ∂ ω 1 = ∂ ( y − 0 ) 2 ∂ ω 1 = ∂ ( w 1 2 + 2 ∗ w 1 ∗ ( w 2 + w 3 + w 4 ) + c o n s t ) ∂ w 1 = 2 ∗ ( ∑ w ) = − 1.03 x=inputs=[[1,1],[1,1]] \\ 标记\omega=[[w1,w2],[w3,w4]] \\ y=output=[-0.5150]=[w1+w2+w3+w4] \\ \frac{\partial\delta}{\partial\omega1}=\frac{\partial(y-0)^2}{\partial\omega1}=\frac{\partial (w1^2+2*w1*(w2+w3+w4)+const)}{\partial w1}=2*(\sum w)=-1.03 x=inputs=[[1,1],[1,1]]ω=[[w1,w2],[w3,w4]]y=output=[0.5150]=[w1+w2+w3+w4]ω1δ=ω1(y0)2=w1(w12+2w1(w2+w3+w4)+const)=2(w)=1.03

  • 很ok!那么当参数出现复用时呢?
import torch
torch.manual_seed(1)


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layer = torch.nn.Conv2d(1, 1, (1, 1), stride=1, padding=0, dilation=1, groups=1, bias=False, padding_mode='zeros')

    def forward(self, x):
        return self.layer(x)


if __name__ == "__main__":
    inputs = torch.tensor([[[0.1, 0.2]]])
    goals = torch.zeros([1, 1, 2])
    net = Net()
    opt = torch.optim.SGD(net.parameters(), lr=1)
    loss_function = torch.nn.MSELoss()

    output = net(inputs)
    print(inputs)
    for i in net.named_parameters():
        print(i)
    print(output)
    loss = loss_function(output, goals)
    opt.zero_grad()
    loss.backward()
    opt.step()
    for i in net.named_parameters():
        print(i)
    print("-" * 50)
    for i in net.parameters():
        print(i.grad)

tensor([[[0.1000, 0.2000]]])
('layer.weight', Parameter containing:
tensor([[[[0.5153]]]], requires_grad=True))
tensor([[[0.0515, 0.1031]]], grad_fn=<SqueezeBackward1>)
('layer.weight', Parameter containing:
tensor([[[[0.4895]]]], requires_grad=True))
--------------------------------------------------
tensor([[[[0.0258]]]])

x = [ 0.1 , 0.2 ] w = [ 0.5152 ] y = [ w 1 x 1 , w 1 x 2 ] = [ 0.0515 , 0.1031 ] ∂ ( y 1 − 0 ) 2 ∂   w 1 = 2 w 1 x 1 2 = 0.0103 ∂ ( y 2 − 0 ) 2 ∂   w 1 = 2 w 1 x 2 2 = 0.0412 ( 0.0412 + 0.0103 ) / 2 = 0.0258 x=[0.1, 0.2] \\ w = [0.5152] \\ y = [w_1x_1, w_1x_2]=[0.0515, 0.1031] \\ \frac{\partial(y_1-0)^2}{\partial\ w_1}=2w_1x_1^2=0.0103 \\ \frac{\partial(y_2-0)^2}{\partial\ w_1}=2w_1x_2^2=0.0412 \\ (0.0412+0.0103)/2=0.0258 x=[0.1,0.2]w=[0.5152]y=[w1x1,w1x2]=[0.0515,0.1031] w1(y10)2=2w1x12=0.0103 w1(y20)2=2w1x22=0.0412(0.0412+0.0103)/2=0.0258

  • ok,求的是均值

3. 池化层

import torch
torch.manual_seed(1)


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layer = torch.nn.Conv2d(1, 1, (1, 1), stride=1, padding=0, dilation=1, groups=1, bias=False, padding_mode='zeros')
        self.pool = torch.nn.MaxPool2d((1, 2))

    def forward(self, x):
        x = self.layer(x)
        x = self.pool(x)
        return x


if __name__ == "__main__":
    inputs = torch.tensor([[[0.1, 0.2]]])
    net = Net()
    opt = torch.optim.SGD(net.parameters(), lr=1)
    loss_function = torch.nn.MSELoss()

    output = net(inputs)
    print(inputs)
    for i in net.named_parameters():
        print(i)
    print(output)
    goals = torch.zeros_like(output)
    loss = loss_function(output, goals)
    opt.zero_grad()
    loss.backward()
    opt.step()
    for i in net.named_parameters():
        print(i)
    print("-" * 50)
    for i in net.parameters():
        print(i.grad)
tensor([[[0.1000, 0.2000]]])
('layer.weight', Parameter containing:
tensor([[[[0.5153]]]], requires_grad=True))
tensor([[[0.1031]]], grad_fn=<MaxPool2DWithIndicesBackward0>)
('layer.weight', Parameter containing:
tensor([[[[0.4740]]]], requires_grad=True))
--------------------------------------------------
tensor([[[[0.0412]]]])
  • 这段代码和上面的在功能上只差一个池化层,但显然,输出变了的同时,梯度也变了。在最大池化影响下,梯度少了一个计算,同时也没有求平均值了(计算步骤和刚才相同)
  • 采用平均池化时:
self.pool = torch.nn.AvgPool2d((1, 2))
tensor([[[0.1000, 0.2000]]])
('layer.weight', Parameter containing:
tensor([[[[0.5153]]]], requires_grad=True))
tensor([[[0.0773]]], grad_fn=<AvgPool2DBackward0>)
('layer.weight', Parameter containing:
tensor([[[[0.4921]]]], requires_grad=True))
--------------------------------------------------
tensor([[[[0.0232]]]])

x = [ 0.1 , 0.2 ] w = [ 0.5152 ] y = ( w 1 x 1 + w 1 x 2 ) / 2 = 0.0773 ∂ ( y − 0 ) 2 ∂   w 1 = ( x 1 + x 2 ) 2 2 w 1 = 0.2318 x=[0.1, 0.2] \\ w = [0.5152] \\ y = (w_1x_1+ w_1x_2)/2=0.0773 \\ \frac{\partial(y-0)^2}{\partial\ w_1}=\frac{(x_1+x_2)^2}2w_1=0.2318 x=[0.1,0.2]w=[0.5152]y=(w1x1+w1x2)/2=0.0773 w1(y0)2=2(x1+x2)2w1=0.2318

  • 也ok!

你可能感兴趣的:(deep,learning,深度学习,python,人工智能)