主要步骤:
- 初始化聚类中心点个数,即k
- 获得所有标注文件中标注框宽度与长度,长与宽一一对应,并保存起来
- 对保存的所有框的长与宽做kmeans聚类,获得聚类的k个结果,每个结果为一对坐标,分别表示这9个聚类的中心
初始化聚类中心点个数,即k
获得所有标注文件中标注框宽度与长度,长与宽一一对应,并保存起来
原文链接:https://github.com/qqwweee/keras-yolo3 kmeans.py
# 初始化Kmeas,cluster_number为聚类中心点数 kmeans = YOLO_Kmeans(cluster_number, filename) kmeans.txt2clusters("*.txt") def txt2clusters(self, anchors_path): # 获取所有标注框的宽高 all_boxes = self.txt2boxes() # 对所有宽高做kmeans聚类 result = self.kmeans(all_boxes, k=self.cluster_number) # 将聚类结果从小到大排序 result = result[np.lexsort(result.T[0, None])] self.result2txt(result, anchors_path) print("K anchors:\n {}".format(result)) print("Accuracy: {:.2f}%".format( self.avg_iou(all_boxes, result) * 100)) def txt2boxes(self): f = open(self.filename, 'r') dataSet = [] for line in f: infos = line.split(" ") length = len(infos) for i in range(1, length): width = int(infos[i].split(",")[2]) - \ int(infos[i].split(",")[0]) height = int(infos[i].split(",")[3]) - \ int(infos[i].split(",")[1]) dataSet.append([width, height]) result = np.array(dataSet) f.close() return result
- 对保存的所有框的长与宽做kmeans聚类,获得聚类的k个结果,每个结果为一对坐标,分别表示这9个聚类的中心
# 获得的聚类中心点实际上是聚类得到的候选框 def kmeans(self, boxes, k, dist=np.median): # 获取所有标注框的数量 box_number = boxes.shape[0] # 初始化distances与last_distances 下面这一部可以省略 # distances = np.empty((box_number, k)) last_nearest = np.zeros((box_number,)) np.random.seed() # 随机选取k个点作为起始聚类中心点 clusters = boxes[np.random.choice( box_number, k, replace=False)] # init k clusters while True: # 计算每个点到各类中心的距离,iou计算方式:宽高最小面积/(所有框的宽高面积+预选聚类点面积-宽高最小面积) # 这里用1-iou原因是 iou越大越好,iou越大则距离就越小 distances = 1 - self.iou(boxes, clusters) current_nearest = np.argmin(distances, axis=1) if (last_nearest == current_nearest).all(): break # clusters won't change # 判断是否还需要进行9个聚类点的更新 for cluster in range(k): # 为了避免标注的物体中,存在框特别小或者特别大的情况,选取的函数是类里所有框中位数选取,而不是平均值选取 clusters[cluster] = dist( # update clusters boxes[current_nearest == cluster], axis=0) last_nearest = current_nearest return clusters def iou(self, boxes, clusters): # 1 box -> k clusters n = boxes.shape[0] k = self.cluster_number # 计算所有标注框的面积,并且扩展到n行k列 box_area = boxes[:, 0] * boxes[:, 1] box_area = box_area.repeat(k) box_area = np.reshape(box_area, (n, k)) # 计算所有聚类框的面积,并且扩展到n行k列 cluster_area = clusters[:, 0] * clusters[:, 1] cluster_area = np.tile(cluster_area, [1, n]) cluster_area = np.reshape(cluster_area, (n, k)) # 计算所有聚类框与标注框最小的宽高的面积,并且扩展到n行k列 box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k)) cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k)) min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix) box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k)) cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k)) min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix) inter_area = np.multiply(min_w_matrix, min_h_matrix) # iou计算:宽高最小面积/(所有框的宽高面积+预选聚类点面积-宽高最小面积) result = inter_area / (box_area + cluster_area - inter_area) return result
kmeans完整代码
import numpy as np class YOLO_Kmeans: def __init__(self, cluster_number, filename): self.cluster_number = cluster_number self.filename = filename def iou(self, boxes, clusters): # 1 box -> k clusters n = boxes.shape[0] k = self.cluster_number # 计算所有标注框的面积,并且扩展到n行k列 box_area = boxes[:, 0] * boxes[:, 1] box_area = box_area.repeat(k) box_area = np.reshape(box_area, (n, k)) # 计算所有聚类框的面积,并且扩展到n行k列 cluster_area = clusters[:, 0] * clusters[:, 1] cluster_area = np.tile(cluster_area, [1, n]) cluster_area = np.reshape(cluster_area, (n, k)) # 计算所有聚类框与标注框最小的宽高的面积,并且扩展到n行k列 box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k)) cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k)) min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix) box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k)) cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k)) min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix) inter_area = np.multiply(min_w_matrix, min_h_matrix) # iou计算:宽高最小面积/(所有框的宽高面积+预选聚类点面积-宽高最小面积) result = inter_area / (box_area + cluster_area - inter_area) return result def avg_iou(self, boxes, clusters): accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)]) return accuracy def kmeans(self, boxes, k, dist=np.median): # 获取所有标注框的数量 box_number = boxes.shape[0] # 初始化distances与last_distances 下面这一部可以省略 # distances = np.empty((box_number, k)) last_nearest = np.zeros((box_number,)) np.random.seed() # 随机选取k个点作为起始聚类中心点 clusters = boxes[np.random.choice( box_number, k, replace=False)] # init k clusters while True: # 计算每个点到各类中心的距离,iou计算方式:宽高最小面积/(所有框的宽高面积+预选聚类点面积-宽高最小面积) # 这里用1-iou原因是 iou越大越好,iou越大则距离就越小 distances = 1 - self.iou(boxes, clusters) current_nearest = np.argmin(distances, axis=1) if (last_nearest == current_nearest).all(): break # clusters won't change # 判断是否还需要进行9个聚类点的更新 for cluster in range(k): # 为了避免标注的物体中,存在框特别小或者特别大的情况,选取的函数是类里所有框中位数选取,而不是平均值选取 clusters[cluster] = dist( # update clusters boxes[current_nearest == cluster], axis=0) last_nearest = current_nearest return clusters def result2txt(self, data, anchors_path): f = open(anchors_path, 'w') row = np.shape(data)[0] for i in range(row): if i == 0: x_y = "%d,%d" % (data[i][0], data[i][1]) else: x_y = ", %d,%d" % (data[i][0], data[i][1]) f.write(x_y) f.close() def txt2boxes(self): f = open(self.filename, 'r') dataSet = [] for line in f: infos = line.split(" ") length = len(infos) for i in range(1, length): width = int(infos[i].split(",")[2]) - \ int(infos[i].split(",")[0]) height = int(infos[i].split(",")[3]) - \ int(infos[i].split(",")[1]) dataSet.append([width, height]) result = np.array(dataSet) f.close() return result def txt2clusters(self, anchors_path): # 获取所有标注框的宽高 all_boxes = self.txt2boxes() # 对所有宽高做kmeans聚类 result = self.kmeans(all_boxes, k=self.cluster_number) # 将聚类结果从小到大排序 result = result[np.lexsort(result.T[0, None])] self.result2txt(result, anchors_path) print("K anchors:\n {}".format(result)) print("Accuracy: {:.2f}%".format( self.avg_iou(all_boxes, result) * 100)) if __name__ == "__main__": cluster_number = 9 # 所有标注信息储存的txt文件, # -文件路径 坐标(左上角坐标,右下角坐标),类别 .... # */aircraft_985_0.jpg 363,103,405,144,0 365,137,407,177,0 362,182,404,215,0 358,225,404,263,0 363,264,400,295,0 357,294,400,330,0 filename = "*.txt" kmeans = YOLO_Kmeans(cluster_number, filename) # 存放k个宽高的anchors.txt文件 kmeans.txt2clusters("*.txt")
参考链接:https://www.cnblogs.com/monologuesmw/p/12761653.html