python实现K-means的代码

import pandas as pd
from datetime import *
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
from pylab import *
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False

##  画像
# 1) 门店"经纬度"数据的准备
data_temp=pd.DataFrame({})
data_temp['经度']=data_md['高德经度']
data_temp['维度']=data_md['高德维度']


# (1) 绘制数据的分布图
inertia = []
for n in range(1 , 11):
    algorithm = (KMeans(n_clusters=n, init='k-means++', n_init = 10 ,max_iter=300,
                        tol=0.0001,  random_state=111, algorithm='elkan') )
    algorithm.fit(data_temp)
    inertia.append(algorithm.inertia_)

plt.figure(1, figsize = (15 ,6))
plt.plot(np.arange(1 , 11) , inertia , 'o')
plt.plot(np.arange(1 , 11) , inertia , '-' , alpha = 0.5)
plt.xlabel('Number of Clusters'), plt.ylabel('Inertia')
plt.title('航线聚类个数的确定')
plt.show()

python实现K-means的代码_第1张图片

estimator = KMeans(n_clusters=4)  # 构造聚类器
estimator.fit(data_temp)  # 聚类
label_pred = estimator.labels_     # 获取聚类标签
centroids = estimator.cluster_centers_    # 每类的中心点
data_md["类别"] = list(label_pred)

你可能感兴趣的:(统计学,机器学习,python,kmeans,机器学习)