原理网上非常多了,这里不再赘述。
感觉我看了很多博客,对我帮助较大的两个是:
https://blog.csdn.net/qq_42052229/article/details/90446073
https://zhuanlan.zhihu.com/p/42732128
1、网络目的确实是解决input的图像size大小不同的问题,
但是:
训练的时候,我们需要一个batch一个batch的训练,这每一个batch的维度是相同的,意味着,这一个batch内的图像input size是相同的,才能训练。除非我们一张一张的训练,这样就会带来其他的很多问题。
所以在训练的时候,往往采用几种不同尺度的size的input,对同一个网络分别进行训练。
而测试时候,因为一般是一张一张的,所以可以任意size的图像input。
2、使用自适应卷积一样可以。
3、代码拆解,不一定要按照网上人家的来,最后全连接数量自己可以任意调整。
4、例子pytorch:
多尺度训练:不同尺度size分别跑一个epoch
if __name__ == '__main__':
train_loader_350, test_loader_350 = load(350)
train_loader_400, test_loader_400 = load(400)
train_loader_450, test_loader_450 = load(450)
train_loader_500, test_loader_500 = load(500)
train_loaders = [train_loader_350, train_loader_400, train_loader_450, train_loader_500]
test_loaders = [test_loader_350, test_loader_400, test_loader_450, test_loader_500]
model = SPPNet().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.99))
criterion = nn.CrossEntropyLoss()
for epoch in range(1, EPOCH + 1):
for train_loader, test_loader in zip(train_loaders, test_loaders):
train(model, device, train_loader, criterion, optimizer, epoch)
test(model, device, test_loader, criterion, epoch)
torch.save(model, save_path)
resnet+sppnet,或者 自适应卷积
import torch
from torch import nn
from torchvision import models
import torch.nn.functional as F
import os, math
from torch.nn.modules.pooling import AdaptiveAvgPool2d, AdaptiveMaxPool2d
class ResNet(nn.Module):
def __init__(self, layers=18, num_class=2, pretrained=True):
super(ResNet, self).__init__()
if layers == 18:
self.resnet = models.resnet18(pretrained=pretrained)
elif layers == 34:
self.resnet = models.resnet34(pretrained=pretrained)
elif layers == 50:
self.resnet = models.resnet50(pretrained=pretrained)
elif layers == 101:
self.resnet = models.resnet101(pretrained=pretrained)
elif layers == 152:
self.resnet = models.resnet152(pretrained=pretrained)
else:
raise ValueError('layers should be 18, 34, 50, 101.')
self.num_class = num_class
if layers in [18, 34]:
self.fc = nn.Linear(512, num_class)
if layers in [50, 101, 152]:
self.fc = nn.Linear(512 * 4, num_class)
def conv_base(self, x):
x = self.resnet.conv1(x)
x = self.resnet.bn1(x)
x = self.resnet.relu(x)
x = self.resnet.maxpool(x)
layer1 = self.resnet.layer1(x)
layer2 = self.resnet.layer2(layer1)
layer3 = self.resnet.layer3(layer2)
layer4 = self.resnet.layer4(layer3)
return layer1, layer2, layer3, layer4
def forward(self, x):
layer1, layer2, layer3, layer4 = self.conv_base(x)
x = self.resnet.avgpool(layer4)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
class SPPNet(nn.Module):
def __init__(self, backbone=101, num_class=2, pool_size=(1, 2, 6), pretrained=True):
# Only resnet is supported in this version
super(SPPNet, self).__init__()
if backbone in [18, 34, 50, 101, 152]:
self.resnet = ResNet(backbone, num_class, pretrained)
else:
raise ValueError('Resnet{} is not supported yet.'.format(backbone))
if backbone in [18, 34]:
self.c = 512
if backbone in [50, 101, 152]:
self.c = 2048
self.spp = SpatialPyramidPool2D(out_side=pool_size)
num_features = self.c * (pool_size[0] ** 2 + pool_size[1] ** 2 + pool_size[2] ** 2)
self.classifier = nn.Linear(num_features, num_class)
def forward(self, x):
_, _, _, x = self.resnet.conv_base(x)
x = self.spp(x)
x = self.classifier(x)
return x
class SpatialPyramidPool2D(nn.Module):
"""
Args:
out_side (tuple): Length of side in the pooling results of each pyramid layer.
Inputs:
- `input`: the input Tensor to invert ([batch, channel, width, height])
"""
def __init__(self, out_side):
super(SpatialPyramidPool2D, self).__init__()
self.out_side = out_side
def forward(self, x):
# batch_size, c, h, w = x.size()
out = None
for n in self.out_side:
# w_r, h_r = map(lambda s: math.ceil(s / n), x.size()[2:]) # Receptive Field Size
# s_w, s_h = map(lambda s: math.floor(s / n), x.size()[2:]) # Stride
# max_pool = nn.MaxPool2d(kernel_size=(w_r, h_r), stride=(s_w, s_h))
max_pool = AdaptiveMaxPool2d(output_size=(n, n))
y = max_pool(x)
if out is None:
out = y.view(y.size()[0], -1)
else:
out = torch.cat((out, y.view(y.size()[0], -1)), 1)
return out
code源地址:
https://github.com/mmmmmmiracle/SPPNet/blob/master/multi.py
https://github.com/stanleykao72/Deepfake-Detector/blob/934749da71ae31f6e689f2ca2e015cd3e2d5c50a/py_utils/DL/sppnet/models/classifier.py