def _labels_inertia(X, x_squared_norms, centers, precompute_distances=True, distances=None): """E step of the K-means EM algorithm. Compute the labels and the inertia of the given samples and centers. This will compute the distances in-place. Parameters ---------- X: float64 array-like or CSR sparse matrix, shape (n_samples, n_features) The input samples to assign to the labels. x_squared_norms: array, shape (n_samples,) Precomputed squared euclidean norm of each data point, to speed up computations. centers: float64 array, shape (k, n_features) The cluster centers. precompute_distances : boolean, default: True Precompute distances (faster but takes more memory). distances: float64 array, shape (n_samples,) Pre-allocated array to be filled in with each sample's distance to the closest center. Returns ------- labels: int array of shape(n) The resulting assignment inertia : float Sum of distances of samples to their closest cluster center. """ # 获取样本数。 n_samples = X.shape[0] # set the default value of centers to -1 to be able to detect any anomaly # easily # 初始化所属中心的值为-1. labels = -np.ones(n_samples, np.int32) # 如果distances为空,初始化。 if distances is None: distances = np.zeros(shape=(0,), dtype=np.float64) # distances will be changed in-place # 稀疏矩阵计算方式,返回距离和,并更新labels。 if sp.issparse(X): inertia = _k_means._assign_labels_csr( X, x_squared_norms, centers, labels, distances=distances) # dense矩阵计算方式,返回距离和,并更新labels。 else: # 若需预先计算距离 if precompute_distances: return _labels_inertia_precompute_dense(X, x_squared_norms, centers, distances) inertia = _k_means._assign_labels_array( X, x_squared_norms, centers, labels, distances=distances) return labels, inertia
函数 : _labels_insertia_precompute_dense()