使用ResNet50网络提取图像的特征
将提取出的特征作为下一个网络的输入
def __init__(self, class_num, drop_rate, share_weight=False, pretrained=True):
super(SGAM, self).__init__()
self.model_1 = timm.create_model("resnet50", pretrained=pretrained, num_classes=0)
# self.space_net = timm.create_model("LPN", pretrained=pretrained, num_classes=0)
self.space_net = nn.Sequential(
nn.Conv2d(32, 64, 1), # 参数解释:第一个是输入的维度,第二个是输出的维度,第三个是卷积核的大小
nn.Conv2d(64, 64, 3, padding=1, groups=32),
nn.BatchNorm2d(64),
nn.ReLU(),
# nn.Conv2d(64, 32, 1),
# nn.Conv2d(32, 32, 3, padding=1, groups=32),
# nn.BatchNorm2d(32),
# nn.ReLU()
)
def forward(self, x): y = self.model_1(x) y = y[:, :, None, None] y = torch.reshape(y, [16, 32, 8, 8]) y = self.space_net(y) return y
这是我的网络模型,在测试的时候遇到的问题(维度)
1. RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [16, 2048]
在forward函数中对y进行扩维,添加了上述加粗的代码,解决。
2. RuntimeError: Given groups=1, weight of size [64, 16, 1, 1], expected input[16, 32, 8, 8] to have 16 channels, but got 32 channels instead
将space_net网络中的Conv2D中第一个参数,也就是输入的dim改成32,解决。
3. RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat2 in method wrapper_mm)
???
4. cuda error:device-side assert triggered
查阅网上资料得知有两个原因,一开始完全看不懂,各种百度
1、去除cuda ,使用cpu进行训练
2、会报另一个错误out of bounds
第二个错误表明了具体的原因:标签实际数量与网络输出的数量不一致(这个就是我的错误)
我一开始不知道什么叫数量不一致,研究了两天才知道,这是你自己的分类数,也就是你网络需要分类的类别数
# Spatial Global Attention Module SGAM模型测试
import numpy as np
import pandas as pd
import timm
import math
import torch
import torch.nn as nn
from torch.nn import init, functional
from senet.cbam_resnet import resnet50_cbam
class ClassBlock(nn.Module):
def __init__(self, input_dim, class_num, drop_rate, num_bottleneck=512):
super(ClassBlock, self).__init__()
add_block = []
add_block += [nn.Linear(input_dim, num_bottleneck)]
if drop_rate > 0:
add_block += [nn.Dropout(p=drop_rate)]
add_block = nn.Sequential(*add_block)
add_block.apply(weights_init_kaiming)
classifier = []
classifier += [nn.Linear(num_bottleneck, class_num)]
classifier = nn.Sequential(*classifier)
classifier.apply(weights_init_classifier)
self.add_block = add_block
self.classifier = classifier
def forward(self, x):
x = self.add_block(x)
x = self.classifier(x)
return x
class ResNeSt_50(nn.Module):
def __init__(self, classes, drop_rate, share_weight=False, pretrained=True):
super(ResNeSt_50, self).__init__()
self.model_1 = timm.create_model("resnest50d", pretrained=pretrained, num_classes=0)
if share_weight:
self.model_2 = self.model_1
else:
self.model_2 = timm.create_model("resnest50d", pretrained=pretrained, num_classes=0)
self.classifier = ClassBlock(2048, classes, drop_rate)
def forward(self, x1, x2):
return forward_(self.model_1, self.model_2, self.classifier, x1, x2)
def weights_init_kaiming(m):
classname = m.__class__.__name__
# print(classname)
if classname.find('Conv') != -1:
init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') # For old pytorch, you may use kaiming_normal.
elif classname.find('Linear') != -1:
init.kaiming_normal_(m.weight.data, a=0, mode='fan_out')
init.constant_(m.bias.data, 0.0)
elif classname.find('BatchNorm1d') != -1:
init.normal_(m.weight.data, 1.0, 0.02)
init.constant_(m.bias.data, 0.0)
def weights_init_classifier(m):
classname = m.__class__.__name__
if classname.find('Linear') != -1:
init.normal_(m.weight.data, std=0.001)
init.constant_(m.bias.data, 0.0)
class SEAttention(nn.Module):
def __init__(self, channel=512, reduction=16):
super().__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1) # 全局均值池化 输出的是c×1×1
self.fc = nn.Sequential(
nn.Linear(channel, channel // reduction, bias=False), # channel // reduction代表通道压缩
nn.ReLU(inplace=True),
nn.Linear(channel // reduction, channel, bias=False), # 还原
nn.Sigmoid()
)
def forward(self, x):
b, c, _, _ = x.size() # 50×512×7×7
y = self.avg_pool(x).view(b, c) # ① maxpool之后得:50×512×1×1 ② view形状得到50×512
y = self.fc(y).view(b, c, 1, 1) # 50×512×1×1
return x * y.expand_as(x) # 根据x.size来扩展y
class SGAM(nn.Module):
def __init__(self, class_num, drop_rate, share_weight=False, pretrained=True):
super(SGAM, self).__init__()
self.model_1 = timm.create_model("resnet50", pretrained=pretrained, num_classes=0)
# self.space_net = timm.create_model("LPN", pretrained=pretrained, num_classes=0)
self.space_net = nn.Sequential(
nn.Conv2d(32, 64, 1), # 参数解释:第一个是输入的维度,第二个是输出的维度,第三个是卷积核的大小
nn.Conv2d(64, 64, 3, padding=1, groups=32),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 32, 1),
nn.Conv2d(32, 32, 3, padding=1, groups=32),
nn.BatchNorm2d(32),
nn.ReLU()
)
def forward(self, x):
y = self.model_1(x)
y = y[:, :, None, None]
y = torch.reshape(y, [16, 32, 8, 8])
y1 = self.space_net(y)
se = SEAttention(channel=512, reduction=8)
y2 = se(x)
y = torch.concat(y1, y2)
return y
# 实例化模型
model = SGAM(100, 0.1).cuda()
input = torch.randn(16, 3, 384, 384).cuda()
# print(input.size()) torch.Size([16, 3, 384, 384])
output = model(input)
# print(output.size()) torch.Size([16, 32, 8, 8])
1. 对于第一个问题,就是数据的维度不匹配
2. 第二个问题,我也不知道为什么?也请各位大佬在评论区解释说明