第3小问 Elbow method
k = 5
plt.figure(figsize=(10,5))
plt.plot()
plt.scatter(product_df[:, 0], product_df[:, 1], c='k')
plt.show()
# Fit clusters for various numbers of clusters
from sklearn.cluster import KMeans
K = range(1,10)
KM = [KMeans(n_clusters=k, random_state=0).fit(product_df) for k in K]
centroids = [km.cluster_centers_ for km in KM] # cluster centroids
# Compute average euclidean distance between each point and its cluster centroid
from scipy.spatial.distance import cdist
D_k = [cdist(product_df, cent, 'euclidean') for cent in centroids]
cIdx = [np.argmin(D,axis=1) for D in D_k]
dist = [np.min(D,axis=1) for D in D_k]
avgWithinSS = [sum(d)/product_df.shape[0] for d in dist]
#Plot it
plt.plot()
plt.plot(K, avgWithinSS, 'b.-', ms=10)
plt.xlabel('Number of clusters')
plt.ylabel('Average within-cluster squared error')
plt.show()
SpectralClustering也是这样做
把参数改改就好