代码下载:https://github.com/pakaqiu/yolov3_simple
视频链接:https://www.bilibili.com/video/BV1MK4y1X74Q?p=1
yolo已经出到了第5代,在CV目标检测领域中yolo一直在速度和精度两者保持着很不错的性能,而且一代比一代更优秀,不得不服作者的良心大作。这段时间在研究yolo,发现anchors生成这块东西网上一堆的相关文章,但是没有多少实践可用,原理的东西就不说太多,以yolov3为主来说明,采用kmeans对目标物体的宽高进行聚类产生anchors用于后续的yolo训练;注意这里的宽高是目标物体相对原图的宽高。下面给出实现代码,代码来源于github,实现如下:
import numpy as np
class YOLO_Kmeans:
def __init__(self, cluster_number, filename):
self.cluster_number = cluster_number
self.filename = filename
def iou(self, boxes, clusters): # 1 box -> k clusters
n = boxes.shape[0]
k = cluster_number
box_area = boxes[:, 0] * boxes[:, 1]
box_area = box_area.repeat(k)
box_area = np.reshape(box_area, (n, k))
cluster_area = clusters[:, 0] * clusters[:, 1]
cluster_area = np.tile(cluster_area, [1, n])
cluster_area = np.reshape(cluster_area, (n, k))
box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k))
cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k))
min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix)
box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k))
cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k))
min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix)
inter_area = np.multiply(min_w_matrix, min_h_matrix)
result = inter_area / (box_area + cluster_area - inter_area)
return result
def avg_iou(self, boxes, clusters):
accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)])
return accuracy
def kmeans(self, boxes, k, dist=np.median):
box_number = boxes.shape[0]
distances = np.empty((box_number, k))
last_nearest = np.zeros((box_number,))
np.random.seed()
clusters = boxes[np.random.choice(
box_number, k, replace=False)] # init k clusters
while True:
distances = 1 - self.iou(boxes, clusters)
current_nearest = np.argmin(distances, axis=1)
if (last_nearest == current_nearest).all():
break # clusters won't change
for cluster in range(k):
clusters[cluster] = dist( # update clusters
boxes[current_nearest == cluster], axis=0)
last_nearest = current_nearest
return clusters
def result2txt(self, data):
f = open("yolo_anchors.txt", 'w')
row = np.shape(data)[0]
for i in range(row):
if i == 0:
x_y = "%d,%d" % (data[i][0], data[i][1])
else:
x_y = ", %d,%d" % (data[i][0], data[i][1])
f.write(x_y)
f.close()
def txt2boxes(self):
f = open(self.filename, 'r')
dataSet = []
for line in f:
infos = line.split(" ")
length = len(infos)
for i in range(1, length):
width = int(infos[i].split(",")[2]) - \
int(infos[i].split(",")[0])
height = int(infos[i].split(",")[3]) - \
int(infos[i].split(",")[1])
dataSet.append([width, height])
result = np.array(dataSet)
f.close()
return result
def get_boxes(self): #根据自己的label方式读取box
f = open(self.filename,'r')
dataSet = []
for line in f.readlines():
line = line.strip().split(' ')
line = line[1:]
box = list(map(float,line))
boxes = np.array(box,dtype = np.float32).reshape(-1,4)
for b in boxes:
width = int(b[2] - b[0])
height = int(b[3] - b[1])
dataSet.append([width,height])
result = np.array(dataSet)
f.close()
return result
def txt2clusters(self):
#all_boxes = self.txt2boxes()
all_boxes = self.get_boxes()
result = self.kmeans(all_boxes, k=self.cluster_number)
result = result[np.lexsort(result.T[0, None])]
self.result2txt(result)
print("K anchors:\n {}".format(result))
print("Accuracy: {:.2f}%".format(
self.avg_iou(all_boxes, result) * 100))
if __name__ == "__main__":
cluster_number = 6
filename = "boxes.txt"
kmeans = YOLO_Kmeans(cluster_number, filename)
kmeans.txt2clusters()
[[ 20 19]
[ 35 32]
[ 48 46]
[ 73 65]
[117 110]
[216 206]]
Accuracy: 74.46%
不同的数据方式需要根据自己需求修改,希望可以帮助到大家,有不当之处请指教。
最后附上一个视频讲解b站跳转,初次做视频,不妥之处望谅解和指正,谢谢!