1、CNN中,卷积和池化是不会在乎输入图像有多大的,但是全连接层在训练的过程中的矩阵参数已经确定了,所以,全连接层限制了输入图像的大小。
2、如何解决上面的问题呢,引入了空间金字塔池化ssp,其实就是将原图像进行不同尺度的池化,然后将池化后得到的信息整合在一起,然后再输出到全连接层。
不管你输入的图像的大小是多大,空间金字塔池化会将图片进行个数确定的池化,然后得到的结果的个数是不变的,这样到全连接层就不变,就保证了输入图像分辨率可以随意大小。
代码理解如下:
from math import floor, ceil
import torch
import torch.nn as nn
import torch.nn.functional as F
class SpatialPyramidPooling2d(nn.Module):
def __init__(self, num_level, pool_type='max_pool'):
super(SpatialPyramidPooling2d, self).__init__()
self.num_level = num_level
self.pool_type = pool_type
def forward(self, x):
N, C, H, W = x.size()
print('多尺度提取信息,并进行特征融合...')
print()
for i in range(self.num_level):
level = i + 1
print('第',level,'次计算池化核:')
kernel_size = (ceil(H / level), ceil(W / level))
print('kernel_size: ',kernel_size)
stride = (ceil(H / level), ceil(W / level))
print('stride: ',stride)
padding = (floor((kernel_size[0] * level - H + 1) / 2), floor((kernel_size[1] * level - W + 1) / 2))
print('padding: ',padding)
print()
print('进行最大池化并将提取特征展开:')
if self.pool_type == 'max_pool':
tensor = (F.max_pool2d(x, kernel_size=kernel_size, stride=stride, padding=padding)).view(N, -1)
else:
tensor = (F.avg_pool2d(x, kernel_size=kernel_size, stride=stride, padding=padding)).view(N, -1)
if i == 0:
res = tensor
print('展开大小为: ',res.size())
print()
else:
res = torch.cat((res, tensor), 1)
print('合并为: ',res.size())
print()
return res
class SPPNet(nn.Module):
def __init__(self, num_level=3, pool_type='max_pool'):
super(SPPNet, self).__init__()
self.num_level = num_level
self.pool_type = pool_type
self.feature = nn.Sequential(nn.Conv2d(3, 64, 3),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(64, 64, 3),
nn.ReLU())
# num_grid = 1 + 4 + 9 = 14
self.num_grid = self._cal_num_grids(num_level)
self.spp_layer = SpatialPyramidPooling2d(num_level)
self.linear = nn.Sequential(nn.Linear(self.num_grid * 64, 512),
nn.Linear(512, 10))
def _cal_num_grids(self, level):
count = 0
for i in range(level):
count += (i + 1) * (i + 1)
return count
def forward(self, x):
print('x初始大小为:')
N, C, H, W = x.size()
print('N:', N, ' C:', C, ' H', H, ' W:', W)
print()
x = self.feature(x)
print('x经过卷积、激活、最大池化、卷积、激活变成:')
N, C, H, W = x.size()
print('64(conv)->62(maxpool)->31(conv)->29')
print('N:', N, ' C:', C, ' H', H, ' W:', W)
print()
print('x进行空间金字塔池化:')
x = self.spp_layer(x)
print('空间金字塔池化后,x进入全连接层:')
x = self.linear(x)
return x
if __name__ == '__main__':
a = torch.rand((1, 3, 64, 64))
net = SPPNet()
output = net(a)
print(output)
输出:
x初始大小为:
N: 1 C: 3 H 64 W: 64
x经过卷积、激活、最大池化、卷积、激活变成:
64(conv)->62(maxpool)->31(conv)->29
N: 1 C: 64 H 29 W: 29
x进行空间金字塔池化:
多尺度提取信息,并进行特征融合...
第 1 次计算池化核:
kernel_size: (29, 29)
stride: (29, 29)
padding: (0, 0)
进行最大池化并将提取特征展开:
展开大小为: torch.Size([1, 64])
第 2 次计算池化核:
kernel_size: (15, 15)
stride: (15, 15)
padding: (1, 1)
进行最大池化并将提取特征展开:
合并为: torch.Size([1, 320])
第 3 次计算池化核:
kernel_size: (10, 10)
stride: (10, 10)
padding: (1, 1)
进行最大池化并将提取特征展开:
合并为: torch.Size([1, 896])
空间金字塔池化后,x进入全连接层:
tensor([[-0.0894, -0.1091, -0.1104, 0.0846, -0.0732, 0.0539, 0.0072, -0.0244,
-0.0082, 0.0929]], grad_fn=)
Process finished with exit code 0