bboxes_pred = model(images) #bboxes_pred为列表,长度为3,三个尺度的预测
'''例如sub batch_size为4时 类别为5时
bboxes_pred[0].shape -> torch.Size([4, 30, 76, 76])
bboxes_pred[1].shape -> torch.Size([4, 30, 38, 38])
bboxes_pred[2].shape -> torch.Size([4, 30, 19, 19])'''
loss, loss_xy, loss_wh, loss_obj, loss_cls, loss_l2 = criterion(bboxes_pred, bboxes)
'''
bboxes.shape -> torch.Size([4, 60, 5])
'''
关于模型的输出:
三个尺度预测,每个尺度的stride分别为[8, 16, 32],输入图片大小为608,因此特征维度分别为[76, 38, 19],每个尺度的格子预测三个anchor
例如尺度1,预测76*76个格子,每个格子对3个anchor进行回归
以下均为yolov5s的有关记录
1.train.txt test.txt
训练测试文件 列举出所有参与训练的图片及其路径 每一行一个(修改的为txt + jpg)
2.每一张图片对应一个txt标签文件,每一行代表一个目标
格式为目标序号 x y w h
,目标序号从零开始,不代表背景,xy为中心点, xywh均为相对值(0-1)例如:
0 0.9250814332247557 0.6224489795918368 0.013029315960912053 0.013605442176870748
2 0.3127035830618892 0.4736394557823129 0.013029315960912053 0.015306122448979591
2 0.08224755700325732 0.641156462585034 0.014657980456026058 0.013605442176870748
from n params module arguments
0 -1 1 3520 models.common.Focus [3, 32, 3]
1 -1 1 18560 models.common.Conv [32, 64, 3, 2]
2 -1 1 19904 models.common.BottleneckCSP [64, 64, 1]
3 -1 1 73984 models.common.Conv [64, 128, 3, 2]
4 -1 1 161152 models.common.BottleneckCSP [128, 128, 3]
5 -1 1 295424 models.common.Conv [128, 256, 3, 2]
6 -1 1 641792 models.common.BottleneckCSP [256, 256, 3]
7 -1 1 1180672 models.common.Conv [256, 512, 3, 2]
8 -1 1 656896 models.common.SPP [512, 512, [5, 9, 13]]
9 -1 1 1248768 models.common.BottleneckCSP [512, 512, 1, False]
10 -1 1 131584 models.common.Conv [512, 256, 1, 1]
11 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
12 [-1, 6] 1 0 models.common.Concat [1]
13 -1 1 378624 models.common.BottleneckCSP [512, 256, 1, False]
14 -1 1 33024 models.common.Conv [256, 128, 1, 1]
15 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest']
16 [-1, 4] 1 0 models.common.Concat [1]
17 -1 1 95104 models.common.BottleneckCSP [256, 128, 1, False]
18 -1 1 147712 models.common.Conv [128, 128, 3, 2]
19 [-1, 14] 1 0 models.common.Concat [1]
20 -1 1 313088 models.common.BottleneckCSP [256, 256, 1, False]
21 -1 1 590336 models.common.Conv [256, 256, 3, 2]
22 [-1, 10] 1 0 models.common.Concat [1]
23 -1 1 1248768 models.common.BottleneckCSP [512, 512, 1, False]
24 [17, 20, 23] 1 229245 Detect [80, [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], [128, 256, 512]]
model.train()
img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
y = model(img, profile=False)
print(type(y)) #
print(len(y)) # 3尺度输出
print(type(y[0])) #
print(y[0].size()) # torch.Size([8, 3, 80, 80, 85])
print(y[1].size()) # torch.Size([8, 3, 40, 40, 85])
print(y[2].size()) # torch.Size([8, 3, 20, 20, 85])
评估时输出变为列表,多加了一个输出处于0位,原训练输出放在了列表的1位
model.eval()
print(type(y)) #
print(len(y)) # 2
print(type(y[0])) #
print(type(y[1])) #
print(y[0].size()) # torch.Size([8, 25200, 85])
print(len(y[1])) # 3
print(y[1][0].size()) # torch.Size([8, 3, 80, 80, 85])
print(y[1][1].size()) # torch.Size([8, 3, 40, 40, 85])
print(y[1][2].size()) # torch.Size([8, 3, 20, 20, 85])
# 输入1×3×2560×2560
<class 'tuple'>
2
<class 'torch.Tensor'>
<class 'list'>
torch.Size([1, 403200, 85])
3
torch.Size([1, 3, 320, 320, 85])
torch.Size([1, 3, 160, 160, 85])
torch.Size([1, 3, 80, 80, 85])
s = 128 # 2x min stride
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
print('m.stride', m.stride) # tensor([ 8., 16., 32.])
print('for', [x.shape for x in self.forward(torch.zeros(1, ch, s, s))]) # [torch.Size([1, 3, 16, 16, 85]), torch.Size([1, 3, 8, 8, 85]), torch.Size([1, 3, 4, 4, 85])]
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# anchors 会在模型创建时 对anchors会再做一次处理
m.anchors /= m.stride.view(-1, 1, 1)
print('m.anchors', m.anchors)
tensor([[[ 1.25000, 1.62500],
[ 2.00000, 3.75000],
[ 4.12500, 2.87500]],
[[ 1.87500, 3.81250],
[ 3.87500, 2.81250],
[ 3.68750, 7.43750]],
[[ 3.62500, 2.81250],
[ 4.87500, 6.18750],
[11.65625, 10.18750]]])