def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes
super().__init__()
if isinstance(cfg, dict):
self.yaml = cfg # model dict
else: # is *.yaml
import yaml # for torch hub
self.yaml_file = Path(cfg).name
with open(cfg, encoding='ascii', errors='ignore') as f:
self.yaml = yaml.safe_load(f) # model dict
# Define model
ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
if nc and nc != self.yaml['nc']:
LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
self.yaml['nc'] = nc # override yaml value
if anchors:
LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
self.yaml['anchors'] = round(anchors) # override yaml value
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
self.names = [str(i) for i in range(self.yaml['nc'])] # default names
self.inplace = self.yaml.get('inplace', True)
参数:
ch
输入通道数目,为3nc
类别数目,nc以train.py中命令的yaml文件为主。函数:
# Build strides, anchors
m = self.model[-1] # Detect()
if isinstance(m, Detect):
s = 256 # 2x min stride
m.inplace = self.inplace
x=self.forward(torch.zeros(1, ch, s, s))
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
check_anchor_order(m) # must be in pixel-space (not grid-space)
m.anchors /= m.stride.view(-1, 1, 1)
self.stride = m.stride
self._initialize_biases() # only run once
参数:
m
即整个模型的最后一层,Detects
自定义的一个数据的w,h(用于后面输入网络,看金字塔结构的,缩放关系)m.stride
特征图缩放比例,即tensor([ 8., 16., 32.])函数
self.forward(torch.zeros(1, ch, s, s))
自动输入了一个大小为(1,3,256,256)的全零矩阵,输出的大小为分别为:check_anchor_order
检查anchor是不是从小到大来写的,保证与网络的下采样倍数一致。def check_anchor_order(m):
# Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
a = m.anchors.prod(-1).mean(-1).view(-1) # mean anchor area per output layer
da = a[-1] - a[0] # delta a
ds = m.stride[-1] - m.stride[0] # delta s
if da and (da.sign() != ds.sign()): # same order
LOGGER.info(f'{PREFIX}Reversing anchor order')
m.anchors[:] = m.anchors.flip(0)
m.anchors
=
tensor([[[ 10., 13.],
[ 16., 30.],
[ 33., 23.]],
[[ 30., 61.],
[ 62., 45.],
[ 59., 119.]],
[[116., 90.],
[156., 198.],
[373., 326.]]])
.prod
即是连乘,-1即在最后一个维度上连乘,.prod(-1)后即为.mean(-1)
在最后一个维度上求均值.view(-1)
展成一列,其实数据对的话,mean后已经是一列了 def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
# https://arxiv.org/abs/1708.02002 section 3.3
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
m = self.model[-1] # Detect() module
for mi, s in zip(m.m, m.stride): # from
b = mi.bias.view(m.na, -1).detach() # conv.bias(255) to (3,85)
b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
b[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # cls
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
初始化detect的bias,detect的layer有三层,所以遍历三次
s
即stride def _forward_once(self, x, profile=False, visualize=False):
y, dt = [], [] # outputs
for m in self.model:
if m.f != -1: # if not from previous layer
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
if profile:
self._profile_one_layer(m, x, dt)
x = m(x) # run 找好输入,在计算该层的输出
y.append(x if m.i in self.save else None) # save output 需要则记载
if visualize:
feature_visualization(x, m.type, m.i, save_dir=visualize)
return x
解读:
对于模型每一层有:
y
里,以方便后面层调用),如[-1, 20, 23],-1则取x,20则找y[20]。参数:
y
保存需要保存的输出,比如需要cat的或者是最后detect的结果。
self.save
记录需要保存的层序号,如[4, 6, 10, 14, 17, 20, 23]
函数:
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]
如果m.f是int,则直接在y
里找,如果是其他的,如list,则遍历去找。
savelist创建
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)
如果是参数f不是-1的话,就不用记录,否则就记录f%i,f是需要的数据层索引,i是本层索引,一般是索引前面的,所以x%i=x,至于为什么这么写就不懂了…
——————————————————————————————
forward_once中的前向推理结果
①3:3个检测层;②每层3组锚框;③32,32:检测层w,h;④7:4+2+1