【Python】k-means算法实现

# -*- coding: utf-8 -*-
import  math
import  random
import matplotlib.pyplot as plt
from matplotlib import colors as m_colors
#生成样本点
def gen_random_sample(n_feat,lower,upper):
    sample=[random.uniform(lower,upper) for _ in range(n_feat)]
    return sample
#样本个数
n_samples=1000
#特征个数(特征维度)
n_feat=2
#特征数值范围
lower=0
upper=200
#聚类个数
n_cluster=3
#聚类样本点
samples=[gen_random_sample(n_feat,lower,upper) for _ in range(n_samples)]

#收敛阈值
cutoff=0.1
#聚类中心
cluster=[samples[random.randint(0,n_samples)] for _ in range(n_cluster)]

#重新计算聚类中心:
new_cluster=[]
#计算两点间距离
def cal_distance(a,b):
    s_sum=0
    for i in range(n_feat):
        s_sum+=pow(a[i]-b[i],2)
    dis=math.sqrt(s_sum)
    return dis
cutoff_dis=[1 for _ in cluster]
print(max(cutoff_dis))
#迭代次数
n_loop=1
while(max(cutoff_dis)> cutoff ):

    # 样本点图
    #for i_sample in samples:
        #x_sample.append(i_sample[0])
       # y_sample.append(i_sample[1])
    #plt.scatter(x_sample, y_sample)
    # 画出聚类中心
    #for cluster_point in cluster:
        #plt.scatter(cluster_point[0], cluster_point[1], c='red')
    #plt.title('samples&clusters(iteration:{})'.format(n_loop))
    #plt.show()
    #分类lists
    lists=[[]for _ in range(n_cluster)]
    #对样本遍历进行分类
    for sample_point in samples:
        dis=[cal_distance(sample_point,cluster_point) for cluster_point in cluster]
        min_dis=min(dis)
        index_cluster=dis.index(min_dis)
        lists[index_cluster].append(sample_point)
    #print(dis)

    print(lists)
    #设置不同聚类点的颜色
    color_names=list(m_colors.cnames)

    for i in lists:
        color_=[color_names[lists.index(i)]]*len(i)
        x=[]
        y=[]
        for classify_point in i:
            x.append(classify_point[0])
            y.append(classify_point[1])
        plt.scatter(x,y,c=color_)
        #重新计算聚集点中心
        new_cluster.append([sum(x)/len(x),sum(y)/len(y)])
        #对新产生的聚类中心作图
    for cluster_point in new_cluster:
        plt.scatter(cluster_point[0], cluster_point[1], c='red')
    plt.title('output of k_means(iteration:{})'.format(n_loop))
    plt.show()
    print(len(cluster),len(new_cluster))
    #计算新旧聚类中心的偏移距离
    for i in range(len(cluster)):
        print(i)
        cutoff_dis[i]=cal_distance(cluster[i],new_cluster[i])

    print('cluster',cluster)
    print('new_cluster:',new_cluster)
    print('cutoff_dis:',cutoff_dis)
    #对聚类中心重新赋值
    for i in range(len(cluster)):
        cluster[i]=new_cluster[i]

    new_cluster.clear()
    n_loop+=1

迭代16次,输出结果图:

你可能感兴趣的:(【Python】k-means算法实现)