YOLO v3 中关于 anchor 的 k-means 聚类代码


1. k-means 聚类代码

其他的k-means 代码(没用过)是:

  1. https://github.com/qqwweee/keras-yolo3/blob/master/kmeans.py
  2. https://github.com/AlexeyAB/darknet/blob/master/scripts/gen_anchors.py
    输入是存放 xml标签文件的文件夹:
    只需要更改 example.py 中的一行代码:
ANNOTATIONS_PATH = "xmlLabel/train"  # 更改自己的路径(存放训练标签 xml 的文件路径)
  1. 运行 example.py 计算当前数据集的需要设置的 anchor 的大小(相对于416输入而言)


rows =  8607  #  我的 label 目标的数量
[[0.01416016 0.015625  ]  # 每一个 anchor的宽/图像的宽 ,高/高
 [0.00830078 0.00927734]
 [0.06542969 0.06982422]
 [0.03417969 0.03662109]
 [0.01123047 0.01220703]
 [0.02685547 0.02832031]
 [0.01757812 0.01953125]
 [0.04443359 0.04833984]
 [0.02148438 0.0234375 ]]
Accuracy: 83.41%
 [ 5.890625  3.453125 27.21875  14.21875   4.671875 11.171875  7.3125   18.484375  8.9375  ]-  # 每个 anchor 的宽
 [ 6.5       3.859375 29.046875 15.234375  5.078125 11.78125   8.125    20.109375  9.75    ]  # # 每个 anchor 的高
 [0.89, 0.9, 0.91, 0.92, 0.92, 0.92, 0.93, 0.94, 0.95]  # 每个 anchor 的 宽/高
  1. 对输入anchor 进行排序后的结果是(取整数是为了好看):
[3, 4, 5, 7, 8, 11, 14, 18, 27]
[3, 5, 6, 8, 9, 11, 15, 20, 29]
anchor_416 = 3, 3, 4, 5, 5, 6,   7, 8, 8, 9, 11, 11,   14, 15, 18, 20, 27, 29 
anchor_416_2 = 6, 7, 9, 10, 11, 13,   14, 16, 17, 19, 22, 23,   28, 30, 36, 40, 54, 58
anchor_416_3 = 10, 11, 14, 15, 17, 19,   21, 24, 26, 29, 33, 35,  42, 45, 55, 60, 81, 87
anchor_416_4 = 13, 15, 18, 20, 23, 26,   29,32, 35,39, 44,47,  56,60, 73,80, 108,116
anchor_416_5 = 17, 19, 23, 25, 29, 32,   36, 40, 44, 48, 55, 58,   71, 76, 92, 100, 136, 145

将 anchor 排序的代码如下(自己写的):

import numpy as np 

# anchors = [10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326]
# for i in range(0, len(anchors), 2):
#   print(anchors[i] * anchors[i + 1])

x = [5.890625,  3.453125, 27.21875,  14.21875,   4.671875, 11.171875,  7.3125,   18.484375,  8.9375]
y = [6.5,       3.859375, 29.046875, 15.234375,  5.078125, 11.78125,   8.125,    20.109375,  9.75 ]     
area = []

for i in range(len(x)):
    area.append(x[i] * y[i])


new_x = [0 for _ in range(len(x))]
new_y = [0 for _ in range(len(y))]

for i in range(len(np.argsort(area))):
    new_x[i] = int(x[np.argsort(area)[i]])
    new_y[i] = int(y[np.argsort(area)[i]])

anchors = []
for i in range(len(new_x)):


for i in range(len(new_x)):
    print(new_x[i] * new_y[i])

2. YOLOv3 中默认的 anchor

anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326

一共有 18个数字,9个anchor,每一个anchor的大小,面积依次是:

130, 480,759,     1830, 2790, 7021,     10440, 30888, 121598

3. github 上的代码复制如下:


import glob
import xml.etree.ElementTree as ET

import numpy as np

from kmeans import kmeans, avg_iou

ANNOTATIONS_PATH = "Annotations"

def load_dataset(path):
	dataset = []
	for xml_file in glob.glob("{}/*xml".format(path)):
		tree = ET.parse(xml_file)

		height = int(tree.findtext("./size/height"))
		width = int(tree.findtext("./size/width"))

		for obj in tree.iter("object"):
			xmin = int(obj.findtext("bndbox/xmin")) / width
			ymin = int(obj.findtext("bndbox/ymin")) / height
			xmax = int(obj.findtext("bndbox/xmax")) / width
			ymax = int(obj.findtext("bndbox/ymax")) / height

			dataset.append([xmax - xmin, ymax - ymin])

	return np.array(dataset)

data = load_dataset(ANNOTATIONS_PATH)
out = kmeans(data, k=CLUSTERS)
print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))
print("Boxes:\n {}".format(out))

ratios = np.around(out[:, 0] / out[:, 1], decimals=2).tolist()
print("Ratios:\n {}".format(sorted(ratios)))


import numpy as np

def iou(box, clusters):
    Calculates the Intersection over Union (IoU) between a box and k clusters.
    :param box: tuple or array, shifted to the origin (i. e. width and height)
    :param clusters: numpy array of shape (k, 2) where k is the number of clusters
    :return: numpy array of shape (k, 0) where k is the number of clusters
    x = np.minimum(clusters[:, 0], box[0])
    y = np.minimum(clusters[:, 1], box[1])
    if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
        raise ValueError("Box has no area")

    intersection = x * y
    box_area = box[0] * box[1]
    cluster_area = clusters[:, 0] * clusters[:, 1]

    iou_ = intersection / (box_area + cluster_area - intersection)

    return iou_

def avg_iou(boxes, clusters):
    Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters.
    :param boxes: numpy array of shape (r, 2), where r is the number of rows
    :param clusters: numpy array of shape (k, 2) where k is the number of clusters
    :return: average IoU as a single float
    return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])

def translate_boxes(boxes):
    Translates all the boxes to the origin.
    :param boxes: numpy array of shape (r, 4)
    :return: numpy array of shape (r, 2)
    new_boxes = boxes.copy()
    for row in range(new_boxes.shape[0]):
        new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0])
        new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1])
    return np.delete(new_boxes, [0, 1], axis=1)

def kmeans(boxes, k, dist=np.median):
    Calculates k-means clustering with the Intersection over Union (IoU) metric.
    :param boxes: numpy array of shape (r, 2), where r is the number of rows
    :param k: number of clusters
    :param dist: distance function
    :return: numpy array of shape (k, 2)
    rows = boxes.shape[0]

    distances = np.empty((rows, k))
    last_clusters = np.zeros((rows,))


    # the Forgy method will fail if the whole array contains the same rows
    clusters = boxes[np.random.choice(rows, k, replace=False)]

    while True:
        for row in range(rows):
            distances[row] = 1 - iou(boxes[row], clusters)

        nearest_clusters = np.argmin(distances, axis=1)

        if (last_clusters == nearest_clusters).all():

        for cluster in range(k):
            clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)

        last_clusters = nearest_clusters

    return clusters

