接上篇yolov5之anchor匹配策略(build_targets)分析(1)
gxy = t[:, 2:4] # grid xy
gxi = gain[[2, 3]] - gxy # inverse
#这两个条件可以用来选择靠近的两个邻居网格
j, k = ((gxy % 1. < g) & (gxy > 1.)).T
l, m = ((gxi % 1. < g) & (gxi > 1.)).T
j = torch.stack((torch.ones_like(j), j, k, l, m))
t = t.repeat((5, 1, 1))[j] #过滤box
offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] #过滤偏置
t已经是过滤后的gt box信息,gxy存储以特征图左上角为零点的gt box的(x,y)坐标。gxi这里通过gain[[2,3]]-gxy正好取了个反,表示的是以特征图右下角为零点的gt box的(x,y)坐标信息。yolov5在做计算正样本anchor时做了个大的调整:
1). 不同于yolov3,yolov4,其gt box可以跨层预测,即有些gt box在多个预测层都算正样本;
2).不同于yolov3,yolov4,其gt box可匹配的anchor数可为3~9个,显著增加了正样本的数量。不再是gt box落在那个网格就只由该网格内的anchor来预测,而是根据中心点的位置增加两个邻近的网格的anchor来共同预测。
(Pdb) print(j)
tensor([[ True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, ...],
[False, False, False, True, True, False, False, False, True, False, False, False, False, True, True, False, False, False, False, True, False, ...],
[ True, True, True, True, True, True, False, False, False, False, True, True, True, True, True, True, False, False, False, False, True,...],
[ True, True, True, False, False, True, True, True, False, True, True, True, True, False, False, True, True, True, True, False, True,...],
[False, False, False, False, False, False, True, True, True, True, False, False, False, False, False, False, True, True, True, True, False,...]])
你看,对于任意一个gt box一定是有3个网格与之匹配。
(Pdb) torch.zeros_like(gxy).shape
torch.Size([29, 2])
(Pdb) torch.zeros_like(gxy)[None].shape
torch.Size([1, 29, 2])
(Pdb) off.shape
torch.Size([5, 2])
(Pdb) off[:,None].shape
torch.Size([5, 1, 2])
(Pdb) (torch.zeros_like(gxy)[None]+off[:,None]).shape
torch.Size([5, 29, 2])
(Pdb) (torch.zeros_like(gxy)[None]+off[:,None])[j].shape
torch.Size([87, 2])
b, c = t[:, :2].long().T # image, class
gxy = t[:, 2:4] # grid xy
gwh = t[:, 4:6] # grid wh
gij = (gxy - offsets).long() #取整
gi, gj = gij.T # grid xy indices
# Append
a = t[:, 6].long() # anchor indices
indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices
b表示当前bbox属于该batch内第几张图片,我这里当然全是0,因为batch等于1嘛。a表示当前gt box和当前层的第几个anchor匹配上了。
(gi,gj)是我们计算出来的负责预测该gt box的网格的坐标。
(Pdb) indices
[
(tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]),
tensor([45, 45, 45, 43, 43, 45, 73, 73, 72, 74, 45, 45, 45, 43, 43, 45, 47, 73, 73, 72, 45, 45, 45, 43, 45, 47, 73, 73, 72, 43, 43, 72, 43, 43, 72, 43, 72, 44, 44, 44, 42, 42, 44, 44, 44, 44, 42, 42, 44, 44, 44, 44, 42, 44, 45, 45, 45, 45, 73, 73, 74, 45, 45, 45, 45, 47, 73, 73, 45, 45, 45, 45, 47, 73, 73, 74, 74, 73,75, 48, 74, 74, 73, 48, 74, 74, 73]),
tensor([20, 7, 2, 3, 10, 19, 10, 7, 21, 13, 20, 7, 2, 3, 10, 19, 22, 10, 7, 21, 20, 7, 2, 3, 19, 22, 10, 7, 21, 2, 9, 20, 2, 9, 20, 2, 20, 20, 7, 2, 3, 10, 19, 20, 7, 2, 3, 10, 19, 20, 7, 2, 3, 19, 21, 8, 3, 20, 11, 8, 14, 21, 8, 3, 20, 23, 11, 8, 21, 8, 3, 20, 23, 11, 8, 10, 7, 21,
13, 22, 10, 7, 21, 22, 10, 7, 21]))
]
tbox.append(torch.cat((gxy - gij, gwh), 1)) #
anch.append(anchors[a]) # anchors
tcls.append(c) # class
由于采用了跨网格预测,故xy预测输出不再是0-1,而是-1-1,加上offset偏移,则为-0.5-1.5。
(Pdb) tbox
[tensor([[ 7.94214e-01, 3.07404e-01, 2.06718e+00, 1.88433e+00],
[ 7.05980e-01, 6.42929e-02, 3.34444e+00, 2.60274e+00],
[ 9.11881e-01, 4.95827e-01, 4.72962e+00, 2.13167e+00],
[ 3.40125e-01, 3.43548e-01, 1.65410e+00, 4.67637e+00],
[ 2.58821e-01, 3.75946e-01, 8.37189e-01, 2.39581e+00],
[ 7.20589e-01, 1.59073e-03, 1.34638e+00, 2.37982e+00],
[ 9.99854e-01, 5.47440e-01, 1.49643e+00, 2.53927e+00],
[ 7.29174e-01, 5.61737e-01, 1.18704e+00, 2.64536e+00],
[ 7.24678e-02, 9.07990e-01, 2.33363e+00, 5.96677e+00],
[ 7.57526e-01, 7.26974e-01, 5.69084e-01, 8.14987e-01],
[ 7.94214e-01, 3.07404e-01, 2.06718e+00, 1.88433e+00],
[ 7.05980e-01, 6.42929e-02, 3.34444e+00, 2.60274e+00],
[ 9.11881e-01, 4.95827e-01, 4.72962e+00, 2.13167e+00],
[ 3.40125e-01, 3.43548e-01, 1.65410e+00, 4.67637e+00],
[ 2.58821e-01, 3.75946e-01, 8.37189e-01, 2.39581e+00],
[ 7.20589e-01, 1.59073e-03, 1.34638e+00, 2.37982e+00],
[ 5.97118e-01, 9.40834e-01, 5.71178e+00, 1.92723e+00],
[ 9.99854e-01, 5.47440e-01, 1.49643e+00, 2.53927e+00],
[ 7.29174e-01, 5.61737e-01, 1.18704e+00, 2.64536e+00],
[ 7.24678e-02, 9.07990e-01, 2.33363e+00, 5.96677e+00],
......
[-1.45912e-04, 5.47440e-01, 1.49643e+00, 2.53927e+00],
[-2.70826e-01, 5.61737e-01, 1.18704e+00, 2.64536e+00],
[ 9.99854e-01, -4.52560e-01, 1.49643e+00, 2.53927e+00],
[ 7.29174e-01, -4.38263e-01, 1.18704e+00, 2.64536e+00],
[ 7.24678e-02, -9.20105e-02, 2.33363e+00, 5.96677e+00],
[ 7.57526e-01, -2.73026e-01, 5.69084e-01, 8.14987e-01],
[ 5.97118e-01, -5.91660e-02, 5.71178e+00, 1.92723e+00],
[ 9.99854e-01, -4.52560e-01, 1.49643e+00, 2.53927e+00],
[ 7.29174e-01, -4.38263e-01, 1.18704e+00, 2.64536e+00],
[ 7.24678e-02, -9.20105e-02, 2.33363e+00, 5.96677e+00],
[ 5.97118e-01, -5.91660e-02, 5.71178e+00, 1.92723e+00],
[ 9.99854e-01, -4.52560e-01, 1.49643e+00, 2.53927e+00],
[ 7.29174e-01, -4.38263e-01, 1.18704e+00, 2.64536e+00],
[ 7.24678e-02, -9.20105e-02, 2.33363e+00, 5.96677e+00]])]
(Pdb) anch
[tensor([[1.25000, 1.62500],
[1.25000, 1.62500],
[1.25000, 1.62500],
[1.25000, 1.62500],
[1.25000, 1.62500],
[1.25000, 1.62500],
[1.25000, 1.62500],
......
[1.25000, 1.62500],
[1.25000, 1.62500],
[1.25000, 1.62500],
[1.25000, 1.62500],
[2.00000, 3.75000],
[2.00000, 3.75000],
[2.00000, 3.75000],
[2.00000, 3.75000],
[4.12500, 2.87500],
[4.12500, 2.87500],
[4.12500, 2.87500],
[4.12500, 2.87500]])]
(Pdb) tcls
[tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]