import numpy as np class YOLO_Kmeans: def __init__(self, cluster_number, filename): self.cluster_number = cluster_number self.filename = "train.txt" def iou(self, boxes, clusters): # 1 box -> k clusters n = boxes.shape[0] k = self.cluster_number box_area = boxes[:, 0] * boxes[:, 1] box_area = box_area.repeat(k) box_area = np.reshape(box_area, (n, k)) cluster_area = clusters[:, 0] * clusters[:, 1] cluster_area = np.tile(cluster_area, [1, n]) cluster_area = np.reshape(cluster_area, (n, k)) box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k)) cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k)) min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix) box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k)) cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k)) min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix) inter_area = np.multiply(min_w_matrix, min_h_matrix) result = inter_area / (box_area + cluster_area - inter_area) return result def avg_iou(self, boxes, clusters): accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)]) return accuracy def kmeans(self, boxes, k, dist=np.median): box_number = boxes.shape[0] distances = np.empty((box_number, k)) last_nearest = np.zeros((box_number,)) np.random.seed() clusters = boxes[np.random.choice( box_number, k, replace=False)] # init k clusters while True: distances = 1 - self.iou(boxes, clusters) current_nearest = np.argmin(distances, axis=1) if (last_nearest == current_nearest).all(): break # clusters won't change for cluster in range(k): clusters[cluster] = dist( # update clusters boxes[current_nearest == cluster], axis=0) last_nearest = current_nearest return clusters def result2txt(self, data): f = open("yolo_anchors.txt", 'w') row = np.shape(data)[0] for i in range(row): if i == 0: x_y = "%d,%d" % (data[i][0], data[i][1]) else: x_y = ", %d,%d" % (data[i][0], data[i][1]) f.write(x_y) f.close() def txt2boxes(self): f = open(self.filename, 'r') dataSet = [] for line in f: infos = line.split(" ") length = len(infos) for i in range(1, length): width = int(infos[i].split(",")[2]) - \ int(infos[i].split(",")[0]) height = int(infos[i].split(",")[3]) - \ int(infos[i].split(",")[1]) dataSet.append([width, height]) result = np.array(dataSet) f.close() return result def txt2clusters(self): all_boxes = self.txt2boxes() result = self.kmeans(all_boxes, k=self.cluster_number) result = result[np.lexsort(result.T[0, None])] self.result2txt(result) print("K anchors:\n {}".format(result.tolist())) print(str(result.tolist()).replace("[","").replace("]","")) print("Accuracy: {:.2f}%".format( self.avg_iou(all_boxes, result) * 100)) if __name__ == "__main__": cluster_number = 9 filename = "train.txt" kmeans = YOLO_Kmeans(cluster_number, filename) kmeans.txt2clusters()
在这段代码中,定义了一个名为 YOLO_Kmeans
的类,用于实现基于 K-means 聚类算法的 YOLO 目标检测模型中的锚点(anchor)生成。以下是对代码的详细解释:
-
类初始化:
YOLO_Kmeans
类在初始化时接收聚类的数量cluster_number
和包含边界框信息的文件名filename
。self.cluster_number
存储聚类数量,self.filename
存储文件名,默认为 "train.txt"。 -
iou
函数:计算单个边界框与所有聚类中心的交并比(IOU)。首先计算边界框的面积,然后计算每个聚类中心的面积。接着计算最小宽度和高度,得到交集区域的面积。最后,计算 IOU 值,即交集面积除以并集面积。 -
avg_iou
函数:计算所有边界框与聚类中心的平均 IOU 值。通过iou
函数计算每个边界框与聚类中心的 IOU 值,然后取最大值,并计算平均值。 -
kmeans
函数:实现 K-means 聚类算法。首先初始化聚类中心,然后不断迭代更新聚类中心,直到聚类中心不再变化。使用iou
函数计算距离,然后更新聚类中心。 -
result2txt
函数:将聚类结果写入到 "yolo_anchors.txt" 文件中。 -
txt2boxes
函数:从 "train.txt" 文件中读取边界框信息,并转换为 NumPy 数组。 -
txt2clusters
函数:调用txt2boxes
函数获取边界框信息,然后调用kmeans
函数进行聚类,最后调用result2txt
函数将聚类结果写入文件,并打印聚类中心和平均 IOU 值。
在主函数中,创建 YOLO_Kmeans
类的实例,并调用 txt2clusters
函数执行聚类操作。
需要注意的是,K-means 算法对初始种子点的选择比较敏感,选择好的种子点可以加快聚类速度,否则聚类速度会比较慢1。此外,聚类 anchor box 只需要 bbox 中的左上角与右下角的 x,y 数据2。每次运行 K-means 聚类的结果可能会有所不同,因为算法的随机性2。2