def _labels_inertia_precompute_dense(X, x_squared_norms, centers, distances): """Compute labels and inertia using a full distance matrix. This will overwrite the 'distances' array in-place. Parameters ---------- X : numpy array, shape (n_sample, n_features) Input data. x_squared_norms : numpy array, shape (n_samples,) Precomputed squared norms of X. centers : numpy array, shape (n_clusters, n_features) Cluster centers which data is assigned to. distances : numpy array, shape (n_samples,) Pre-allocated array in which distances are stored. Returns ------- labels : numpy array, dtype=np.int, shape (n_samples,) Indices of clusters that samples are assigned to. inertia : float Sum of distances of samples to their closest cluster center. """ # 样本数 n_samples = X.shape[0] # 中心点数 k = centers.shape[0] # 计算样本点到各中心的距离。返回shape(k,n_samples) all_distances = euclidean_distances(centers, X, x_squared_norms, squared=True) # 初始化labels labels = np.empty(n_samples, dtype=np.int32) # 赋值-1 labels.fill(-1) # 初始化mindist,赋值inf。 mindist = np.empty(n_samples) mindist.fill(np.infty) # 对每个中心点: for center_id in range(k): # 取样本点到该中心点的距离,赋值dist。 dist = all_distances[center_id] # 距离比历史距离小,则更新其labels为center_id。 labels[dist < mindist] = center_id # 更新mindist为dist与mindist中较小的一个。 mindist = np.minimum(dist, mindist) # 检查维度。 if n_samples == distances.shape[0]: # distances will be changed in-place # 更新distances distances[:] = mindist # 计算inertia。 inertia = mindist.sum() return labels, inertia
函数:euclidean_distances()