yolov5 自动anchor计算

 早在yolov2时就了解到不同于faster-rcnn中手动设置的anchor,yolov2中的 anchor是通过k-means聚类算法得的,这样更贴合实际的训练数据。这次借学习yolov5的机会把其中关于自动anchor计算的逻辑再梳理一遍,重点就是分析一下utils/autoanchor.py文件的相关函数。除非显示地设置noautoanchor参数为True,否则训练过程中默认会使用自动 anchor 计算,即调用check_anchors函数。
 

....
if not opt.noautoanchor:
    check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
...

参数dataset代表的是训练集,hyp['anchor_t']这个超参数是一个界定anchor与label匹配程度的阈值,imgsz自然就是网络输入尺寸,后面的讲解中按默认的640来推演。


def check_anchors(dataset, model, thr=4.0, imgsz=640):
    # Check anchor fit to data, recompute if necessary
    print('\nAnalyzing anchors... ', end='')
    m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1]  # Detect()
    shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
    #0.1-1.1
    scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1))  # augment scale
    wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float()  # wh
 
    def metric(k):  # compute metric
        r = wh[:, None] / k[None]
        x = torch.min(r, 1. / r).min(2)[0]  # ratio metric
        best = x.max(1)[0]  # best_x
       
        aat = (x > 1. / thr).float().sum(1).mean()  # anchors above threshold
        bpr = (best > 1. / thr).float().mean()  # best possible recall
        return bpr, aat
    
    bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2))
    print('anchors/target = %.2f, Best Possible Recall (BPR) = %.4f' % (aat, bpr), end='')
    #只有当由默认设定的anchor计算出来的bpr这一指标少于设定的阈值时才会通过聚类算法重新计算anchor
    if bpr < 0.98:  # threshold to recompute
        print('. Attempting to improve anchors, please wait...')
        na = m.anchor_grid.numel() // 2  # number of anchors
        new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
        new_bpr = metric(new_anchors.reshape(-1, 2))[0]
        if new_bpr > bpr:  # replace anchors
            new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors)
            m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid)  # for inference
            m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1)  # loss
            check_anchor_order(m)
            print('New anchors saved to model. Update model *.yaml to use these anchors in the future.')
        else:
            print('Original anchors better than new anchors. Proceeding with original anchors.')
    print('')  # newline

yolov5的网络结构配置文件(如:models/yolov5s.yaml)已经给出了默认的一组anchor,只有当bpr小于0.98时才会重新计算anchor,所以这里首先需要搞清楚bpr是什么,它又是怎么计算的?我们知道在训练过程中任何一个gt框它归根结底是要落到特征图中的某个网格的。在yolov5中默认设置了9种anchor,在具体计算bpr(best possible recall)的时候,会考虑这9类anchor的宽高和gt框的宽高之间的差距。上述代码中变量wh用来存储训练数据中所有gt框的宽高,是一个shape为(N,2)的tensor,这里的2自然就是表示的宽和高,N为gt框的总的个数。metric根据默认anchor和wh来具体计算bpr,aat(anchors above threshold)两个指标。
 

def metric(k):  # compute metric
    r = wh[:, None] / k[None]
    x = torch.min(r, 1. / r).min(2)[0]  # ratio metric
    best = x.max(1)[0]  # best_x
       
    aat = (x > 1. / thr).float().sum(1).mean()  # anchors above threshold
    bpr = (best > 1. / thr).float().mean()  # best possible recall
    return bpr, aat

metric这个函数初看一脸懵逼,再看拍案叫绝。输入参数k存储anchors,调用时被reshape后的尺寸为(9,2)。接下来要计算每个gt框的宽高和所有这9个anchor的宽高的比例值,得到的r其shape为(N,9,2)。x=torch.min(r,1./r).min(2)[0],怎么理解这句代码呢?w_gt/w_anchor或者h_gt/h_anchor这个比例值可能大于1也可能小于1,通过torch.min(r,1./r)的方式统一到<=1的情形,然后再从中选取较小的这个值。得到的x其shape为(n,9),x.max(1)[0]为每个gt框选择匹配宽高比例值最好的那一个值。这样就可以计算aat和bpr了。计算出来的bpr不小于0.98就会重新聚类,否则就返回默认的anchor设定。
 

def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
    """ Creates kmeans-evolved anchors from training dataset
        Arguments:
            path: path to dataset *.yaml, or a loaded dataset
            n: number of anchors
            img_size: image size used for training
            thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
            gen: generations to evolve anchors using genetic algorithm
            verbose: print all results
        Return:
            k: kmeans evolved anchors
        Usage:
            from utils.autoanchor import *; _ = kmean_anchors()
    """
    #import pdb 
    #pdb.set_trace()
 
    thr = 1. / thr
 
    def metric(k, wh):  # compute metrics
        #计算数据集中的gt框与anchor对应宽和高的比例即:gt_w/k_w,gt_h/k_h
        r = wh[:, None] / k[None] 
        x = torch.min(r, 1. / r).min(2)[0]  # ratio metric
        # x = wh_iou(wh, torch.tensor(k))  # iou metric
        return x, x.max(1)[0]  # x, best_x
 
    def anchor_fitness(k):  # mutation fitness
        _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
        return (best * (best > thr).float()).mean()  # fitness
 
    def print_results(k):
        k = k[np.argsort(k.prod(1))]  #计算每一行的乘积(w*h),然后排序得到排序后的k
        x, best = metric(k, wh0)
        bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n  # best possible recall, anch > thr
        print('thr=%.2f: %.4f best possible recall, %.2f anchors past thr' % (thr, bpr, aat))
        print('n=%g, img_size=%s, metric_all=%.3f/%.3f-mean/best, past_thr=%.3f-mean: ' %
              (n, img_size, x.mean(), best.mean(), x[x > thr].mean()), end='')
        for i, x in enumerate(k):
            print('%i,%i' % (round(x[0]), round(x[1])), end=',  ' if i < len(k) - 1 else '\n')  # use in *.cfg
        return k
 
    if isinstance(path, str):  # *.yaml file
        with open(path) as f:
            data_dict = yaml.load(f, Loader=yaml.FullLoader)  # model dict
        from utils.datasets import LoadImagesAndLabels
        dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
    else:
        dataset = path  # dataset
 
    # Get label wh
    shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
    wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)])  #数据集中gt框的wh
 
    # Filter,表示宽或者高小于3个像素,目标太小
    i = (wh0 < 3.0).any(1).sum()
    if i:
        print('WARNING: Extremely small objects found. '
              '%g of %g labels are < 3 pixels in width or height.' % (i, len(wh0)))
    #label大于2个像素的框拿来聚类,[...]内的相当于一个筛选器,为True的留下
    wh = wh0[(wh0 >= 2.0).any(1)]  # filter > 2 pixels
    # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1)  # multiply by random scale 0-1
 
    # Kmeans calculation
    print('Running kmeans for %g anchors on %g points...' % (n, len(wh)))
    #计算宽和高的标准差->[w_std,h_std]
    s = wh.std(0)  # sigmas for whitening
    #开始聚类,仍然是聚成n类,返回聚类后的anchors k
    k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance
    k *= s
    wh = torch.tensor(wh, dtype=torch.float32)  # filtered
    wh0 = torch.tensor(wh0, dtype=torch.float32)  # unfiltered
    k = print_results(k)
 
 
    # Evolve
    npr = np.random
    f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
    pbar = tqdm(range(gen), desc='Evolving anchors with Genetic Algorithm')  # progress bar
    for _ in pbar:
        v = np.ones(sh)
        while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
            v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
        kg = (k.copy() * v).clip(min=2.0)
        fg = anchor_fitness(kg)
        if fg > f:
            f, k = fg, kg.copy()
            pbar.desc = 'Evolving anchors with Genetic Algorithm: fitness = %.4f' % f
            if verbose:
                print_results(k)
 

 

你可能感兴趣的:(yolo,深度学习,yolov5)