参数:
——————————————————————
流程:
——————————————————————
def _kmeans_single(X, n_clusters, x_squared_norms, max_iter=300, init='k-means++', verbose=False, random_state=None, tol=1e-4, precompute_distances=True): """A single run of k-means, assumes preparation completed prior. Parameters ---------- X: array-like of floats, shape (n_samples, n_features) The observations to cluster. n_clusters: int The number of clusters to form as well as the number of centroids to generate. max_iter: int, optional, default 300 Maximum number of iterations of the k-means algorithm to run. init: {'k-means++', 'random', or ndarray, or a callable}, optional Method for initialization, default to 'k-means++': 'k-means++' : selects initial cluster centers for k-mean clustering in a smart way to speed up convergence. See section Notes in k_init for more details. 'random': generate k centroids from a Gaussian with mean and variance estimated from the data. If an ndarray is passed, it should be of shape (k, p) and gives the initial centers. If a callable is passed, it should take arguments X, k and and a random state and return an initialization. tol: float, optional The relative increment in the results before declaring convergence. verbose: boolean, optional Verbosity mode x_squared_norms: array Precomputed x_squared_norms. precompute_distances : boolean, default: True Precompute distances (faster but takes more memory). random_state: integer or numpy.RandomState, optional The generator used to initialize the centers. If an integer is given, it fixes the seed. Defaults to the global numpy random number generator. Returns ------- centroid: float ndarray with shape (k, n_features) Centroids found at the last iteration of k-means. label: integer ndarray with shape (n_samples,) label[i] is the code or index of the centroid the i'th observation is closest to. inertia: float The final value of the inertia criterion (sum of squared distances to the closest centroid for all observations in the training set). n_iter : int Number of iterations run. """ #加速度 random_state = check_random_state(random_state) best_labels, best_inertia, best_centers = None, None, None # init centers = _init_centroids(X, n_clusters, init, random_state=random_state, x_squared_norms=x_squared_norms) if verbose: print("Initialization complete") # Allocate memory to store the distances for each sample to its # closer center for reallocation in case of ties distances = np.zeros(shape=(X.shape[0],), dtype=np.float64) # iterations for i in range(max_iter): centers_old = centers.copy() # labels assignment is also called the E-step of EM labels, inertia = \ _labels_inertia(X, x_squared_norms, centers, precompute_distances=precompute_distances, distances=distances) # computation of the means is also called the M-step of EM if sp.issparse(X): centers = _k_means._centers_sparse(X, labels, n_clusters, distances) else: centers = _k_means._centers_dense(X, labels, n_clusters, distances) if verbose: print("Iteration %2d, inertia %.3f" % (i, inertia)) if best_inertia is None or inertia < best_inertia: best_labels = labels.copy() best_centers = centers.copy() best_inertia = inertia if squared_norm(centers_old - centers) <= tol: if verbose: print("Converged at iteration %d" % i) break return best_labels, best_inertia, best_centers, i + 1