Inception块
import torch
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l
# GoogLeNet的v1版本的inception块
class Inception(nn.Module):
def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
'''
inception块层从左往右
:param in_channels p1,p2,p3,p4的输入通道数
'''
super(Inception, self).__init__(**kwargs)
# 第1个卷积层,1x1的卷积层
self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
# 第2_1个卷积层,1x1卷积层
# 放到3x3卷积层之前的目的是3x3卷积计算慢,减少输出通道
self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
# 第2_2个卷积层,3x3卷积层
self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
# 第3_1个卷积层,1x1卷积层
self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
# 第3_2个卷积层,5x5卷积层
self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
# 第4_1个卷积层,3x3最大池化层
self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
# 第4_2个卷积层,1x1卷积层
self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)
def forward(self, x):
'''
前向传播组合成inception块
'''
p1 = F.relu(self.p1_1(x))
p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
p4 = F.relu(self.p4_2(self.p4_1(x)))
# cat增加第1维度,增加了通道维
# stack不会增加维度
return torch.cat((p1, p2, p3, p4), dim=1)
GoogLeNet模型
# GoogLeNet模型共有5个阶段
# stage1
b1 = nn.Sequential(
# 1个输入通道,64个输出通道,(96+3+3-7+2)/2=48
# 3x3的卷积,提取信息多,计算量小,在信息提取和计算量上比较平衡
# stride=2,输出shape减半
nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
# 非线性单元
# 简单线性函数模拟复杂非线性函数的根本
# ReLU数值稳定性好,避免梯度消失或爆炸
nn.ReLU(),
# (48+1+1-3+2)/2=24
# 最大池化层,避免卷积层对位置的敏感性
# 最大池化层,不改变通道数
# stride=2,输出shape减半
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
# stage2
b2 = nn.Sequential(
# 输入通道64,输出通道64,(24+0+0-1+1)/1=24
# 1x1的卷积,看不到空间信息,能看到通道信息,只抽取通道信息
nn.Conv2d(64, 64, kernel_size=1),
nn.ReLU(),
# 输入通道64,输出通道192,(24+1+1-3+1)/1=24
nn.Conv2d(64, 192, kernel_size=3, padding=1),
nn.ReLU(),
# (24+1+1-3+2)/2=12
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
# stage3
b3 = nn.Sequential(
# p1_1,1x1卷积层,输入通道192,输出通道64,(12+0+0-1+1)/1=12
# p2_1,1x1卷积层,输入通道192,输出通道96,(12+0+0-1+1)/1=12
# p2_2,3x3卷积层,输入通道96,输出通道128,(12+1+1-3+1)/1=12
# p3_1,1x1卷积层,输入通道192,输出通道16,(12+0+0-1+1)/1=12
# p3_2,5x5卷积层,输入通道16,输出通道32,(12+2+2-5+1)/1=12
# p4_1,3x3最大池化层,(12+1+1-3+1)/1=12
# p4_2,1x1卷积层,输入通道192,输出通道32,(12+0+0-1+1)/1=12
# forward()融合了p1,p2,p3,p4的输出,增加了通道维,64+128+32+32=256
Inception(192, 64, (96, 128), (16, 32), 32),
# p1_1,1x1卷积层,输入通道256,输出通道128,(12+0+0-1+1)/1=12
# p2_1,1x1卷积层,输入通道256,输出通道128,(12+0+0-1+1)/1=12
# p2_2,3x3卷积层,输入通道128,输出通道192,(12+1+1-3+1)/1=12
# p3_1,1x1卷积层,输入通道256,输出通道32,(12+0+0-1+1)/1=12
# p3_2,5x5卷积层,输入通道32,输出通道96,(12+2+2-5+1)/1=12
# p4_1,3x3最大池化层,(12+1+1-3+1)/1=12
# p4_2,1x1卷积层,输入通道256,输出通道64,(12+0+0-1+1)/1=12
# forward()融合了p1,p2,p3,p4的输出,增加了通道维,128+192+96+64=480
Inception(256, 128, (128, 192), (32, 96), 64),
# (12+1+1-3+2)/2=6
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
# stage4
b4 = nn.Sequential(
# inception块,不改变宽高,改变通道数量
# 输入通道480,输出通道=192+208+48+64=512,6x6
Inception(480, 192, (96, 208), (16, 48), 64),
# 输入通道512,输出通道=160+224+64+64=512,6x6
Inception(512, 160, (112, 224), (24, 64), 64),
# 输入通道512,输出通道5126x6
Inception(512, 128, (128, 256), (24, 64), 64),
# 输入通道512,输出通道5286x6
Inception(512, 112, (144, 288), (32, 64), 64),
# 输入通道528,输出通道256+320+128+128=832,6x6
Inception(528, 256, (160, 320), (32, 128), 128),
# (6+1+1-3+2)/2=3
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
# stage5
b5 = nn.Sequential(
# inception块,不改变宽高,改变通道数量
# 输入通道832,输出通道=832,3x3
Inception(832, 256, (160, 320), (32, 128), 128),
# 输入通道832,输出通道=1024,3x3
Inception(832, 384, (192, 384), (48, 128), 128),
# !全局池化层,计算快,收敛慢,泛化能力增强
# !kernel_size=3x3 (3+0+0-3+1)/1=1
# AdaptiveAvgPool2d 全局平均池化层,1x1的宽高图片
nn.AdaptiveAvgPool2d((1,1)),
# 展平层
nn.Flatten())
net = nn.Sequential(
b1,
b2,
b3,
b4,
# 输出 1x1024
b5,
# 全连接层,输出1x10
nn.Linear(1024, 10))
# GoogLeNet比较慢,Fashion-MNIST上的训练,输入的高和宽96
X = torch.rand(size=(1, 1, 96, 96))
for layer in net:
X = layer(X)
print(layer.__class__.__name__,'output shape:\t', X.shape)
Sequential output shape: torch.Size([1, 64, 24, 24])
Sequential output shape: torch.Size([1, 192, 12, 12])
Sequential output shape: torch.Size([1, 480, 6, 6])
Sequential output shape: torch.Size([1, 832, 3, 3])
Sequential output shape: torch.Size([1, 1024])
Linear output shape: torch.Size([1, 10])
# 训练模型
lr, num_epochs, batch_size = 0.1, 10, 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
# softmax和权重初始化都在这个方法内部
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())
loss 0.246, train acc 0.905, test acc 0.895
3063.8 examples/sec on cuda:0
GoogLeNet总结
1x1卷积层作用:
池化层作用:
全局平均池化层作用:
通道
网络深度
query
先用别人的网络
最强是resnest系列
超参数调节
通道数
inception内部不同学习方式的并行