Yolov2 聚类生成先验框代码详解(Yolov3同理)----自用笔记

代码来自https://blog.csdn.net/weixin_44791964/article/details/102687531

完整代码如下:

import numpy as np
import xml.etree.ElementTree as ET
import glob
import random

def cas_iou(box,cluster):
    x = np.minimum(cluster[:,0],box[0])
    y = np.minimum(cluster[:,1],box[1])

    intersection = x * y
    area1 = box[0] * box[1]

    area2 = cluster[:,0] * cluster[:,1]
    iou = intersection / (area1 + area2 -intersection)

    return iou

def avg_iou(box,cluster):
    return np.mean([np.max(cas_iou(box[i],cluster)) for i in range(box.shape[0])])


def kmeans(box,k):
    # 取出一共有多少框
    row = box.shape[0]
    
    # 每个框各个点的位置
    distance = np.empty((row,k))
    
    # 最后的聚类位置
    last_clu = np.zeros((row,))

    np.random.seed()

    # 随机选5个当聚类中心
    cluster = box[np.random.choice(row,k,replace = False)]
    # cluster = random.sample(row, k)
    while True:
        # 计算每一行距离五个点的iou情况。
        for i in range(row):
            distance[i] = 1 - cas_iou(box[i],cluster)
        
        # 取出最小点
        near = np.argmin(distance,axis=1)

        if (last_clu == near).all():
            break
        
        # 求每一个类的中位点
        for j in range(k):
            cluster[j] = np.median(
                box[near == j],axis=0)

        last_clu = near

    return cluster



def load_data(path):
    data = []
    # 对于每一个xml都寻找box
    for xml_file in glob.glob('{}/*xml'.format(path)):
        tree = ET.parse(xml_file)
        height = int(tree.findtext('./size/height'))
        width = int(tree.findtext('./size/width'))
        # 对于每一个目标都获得它的宽高
        for obj in tree.iter('object'):
            xmin = int(float(obj.findtext('bndbox/xmin'))) / width
            ymin = int(float(obj.findtext('bndbox/ymin'))) / height
            xmax = int(float(obj.findtext('bndbox/xmax'))) / width
            ymax = int(float(obj.findtext('bndbox/ymax'))) / height

            xmin = np.float64(xmin)
            ymin = np.float64(ymin)
            xmax = np.float64(xmax)
            ymax = np.float64(ymax)
            # 得到宽高
            data.append([xmax-xmin,ymax-ymin])
    return np.array(data)


if __name__ == '__main__':
    anchors_num = 5
    # 载入数据集,可以使用VOC的xml
    path = '../SSD-Tensorflow-master/VOC2012/Annotations'
    
    # 载入所有的xml
    # 存储格式为转化为比例后的width,height
    data = load_data(path)
    
    # 使用k聚类算法
    out = kmeans(data,anchors_num)
    print('acc:{:.2f}%'.format(avg_iou(data,out) * 100))
    print(out)
    print('box',out[:,0] * 13,out[:,1] * 13)
    ratios = np.around(out[:,0]/out[:,1],decimals=2).tolist()
    print('ratios:',sorted(ratios))

Yolov2 聚类生成先验框代码详解(Yolov3同理)----自用笔记_第1张图片
Yolov2 聚类生成先验框代码详解(Yolov3同理)----自用笔记_第2张图片
结果如下:

acc:61.32%
[[0.044      0.07733333]
 [0.106      0.17866667]
 [0.408      0.616     ]
 [0.816      0.83      ]
 [0.2        0.38933333]]
box [ 0.572  1.378  5.304 10.608  2.6  ] [ 1.00533333  2.32266667  8.008      10.79        5.06133333]
ratios: [0.51, 0.57, 0.59, 0.66, 0.98]

你可能感兴趣的:(目标检测,聚类)