K-means聚类算法 伪代码 python3代码

K-means 算法及其代码

  • K-means算法介绍
  • K-means 伪代码
  • K-means python 代码

K-means算法介绍

链接: 模式识别—聚类分析

K-means 伪代码

  1. 计算两个点之间的欧式距离
	def calcluate_distance(core: tuple, dot: tuple):
    """
    计算两个点之间的欧氏距离
    :param core: 质心坐标 (x,y) 类型为tuple
    :param dot:  要计算距离的点(m,n) 类型为tuple
    :return: 距离 dist 类型为float
    """
    #dist用欧式距离公式获得
    return dist
  1. 计算给定点应分配到哪一个质心
def calculate_cluster(dot: tuple, cores: list):
    """
    计算给定点应该指派到哪一个质心
    :param dot: 待处理的点
    :param cores: 质心列表
    :return: 应该指派到的质心的序号
    """
    distance_list = []
    for core in cores:
        #获取到对应的dist
        #添加到distance列表中
    min_dist = min(distance_list)
    #获取最短距离
    put_to_index = distance_list.index(min_dist)
    #获取最短距离对应的index
    return put_to_index
  1. 将点分配到最近的质心的簇
def put_dot_into_clusters(row_data: list, k: int, cores: list):
    """
    将点指派至最近质心的簇
    :param cores:
    :param row_data:
    :param k:
    :return: 已分配点的簇
    """
    clusters = []
    for each in range(k):
		#创建对应个数的簇
    for every_data in row_data:
		#获取every_data的index
		#添加对应的every_data到对应的簇
    return clusters
  1. 计算当前簇的下一个质心
def re_calculate_core(cluster: set):
    """
    计算当前簇的下一个质心
    :param cluster:
    :return: new_core
    """
    all_x = []	
	all_y = []
    for each_dot in cluster:
        #获取所有数据的X和y
	#获取平均的X和Y
    new_core = (round(avg_x, 2), round(avg_y, 2))
	#生成新的质心,且保留两位小数
    return new_core
  1. 初始化数据点
	for num in range(10):
		#adot用random生成,用round保留两位小数
    data_list.append(adot)
	#添加adot到data_list中

K-means python 代码

IDE: Pyharm
Version:Python 3.7.3

from random import random, sample
from math import pow
#Made by 柯少又来秀了
#Made by 柯少又来秀了
#Made by 柯少又来秀了

def calcluate_distance(core: tuple, dot: tuple):
   """
   计算两个点之间的欧氏距离
   :param core: 质心坐标 (x,y) 类型为tuple
   :param dot:  要计算距离的点(m,n) 类型为tuple
   :return: 距离 dist 类型为float
   """
   dist = pow(((dot[0] - core[0]) ** 2 + (dot[1] - core[1]) ** 2), 0.5)
   # if dist == 0:
   #     print("00000000000", dot)
   #考虑特殊情况
   return dist

def calculate_cluster(dot: tuple, cores: list):
   """
   计算给定点应该指派到哪一个质心
   :param dot: 待处理的点
   :param cores: 质心列表
   :return: 应该指派到的质心的序号
   """
   distance_list = []
   for core in cores:
       dist = calcluate_distance(core, dot)
       #获取到对应的dist
       distance_list.append(dist)

   min_dist = min(distance_list)
   #获取最短距离
   put_to_index = distance_list.index(min_dist)
   #获取最短距离对应的index
   return put_to_index

def initiation_cores(row_data: list, k: int):
   """
   根据row_data的数据生成初始质心
   :param row_data: 原始数据
   :param k: k值
   :return: 质心列表
   """
   cores = sample(row_data, k)
   #python中random.sample()方法可以随机地从指定列表中提取出N个不同的元素,即创建质心列表
   return cores

def put_dot_into_clusters(row_data: list, k: int, cores: list):
   """
   将点指派至最近质心的簇
   :param cores:
   :param row_data:
   :param k:
   :return: 已分配点的簇
   """
   clusters = []
   for each in range(k):
       clusters.append(set())
   #set() 函数创建一个无序不重复元素集
   for every_data in row_data:
       index = calculate_cluster(every_data, cores)
       clusters[index].add(every_data)
   return clusters

def re_calculate_core(cluster: set):
   """
   计算当前簇的下一个质心
   :param cluster:
   :return:
   """
   all_x = []
   all_y = []
   for each_dot in cluster:
       all_x.append(each_dot[0])
       all_y.append(each_dot[1])
   avg_x = sum(all_x) / len(all_x)
   avg_y = sum(all_y) / len(all_y)
   new_core = (round(avg_x, 2), round(avg_y, 2))
   return new_core

if __name__ == '__main__':
   #在if __name__ == 'main': 下的代码只有在第一种情况下(即文件作为脚本直接执行)才会被执行,而import到其他脚本中是不会被执行的
   #生成n个点
   data_list = []
   #round() 方法返回浮点数x的四舍五入值
   #round(80.23456, 2) :  80.23
   #random() 方法返回随机生成的一个实数,它在[0,1)范围内
   for num in range(10):
       adot = (round(random() * 20 - 100, 2), round(random() * 20 - 100, 2))
       data_list.append(adot)

   for num in range(100):
       adot = (round(random() * 100 + 100, 2), round(random() * 50 + 150, 2))
       data_list.append(adot)

   for num in range(50):
       adot = (round(random() * 20, 2), round(random() * 20, 2))
       data_list.append(adot)

   for num in range(50):
       adot = (round(random() * 100 + 100, 2), round(random() * 20, 2))
       data_list.append(adot)

   for num in range(100):
       adot = (round(random() * 200, 2), round(random() * 200, 2))
       data_list.append(adot)

   # 设置k值
   k = 4
   # 生成初始质心
   my_cores = initiation_cores(data_list, k)
   roundx = 0
   while True:
       roundx += 1
       # 指派
       cl = put_dot_into_clusters(data_list, k, my_cores)
       new_cores = list()
       for index in range(k):
           new_cores.append(re_calculate_core(cl[index]))
       if new_cores == my_cores:
           break
       else:
           my_cores = new_cores

   import matplotlib.pyplot as plt
   colors = ['#0000FF', '#FF0000', '#00FF00', '#666666', '#FFFF00']
   for index in range(k):
       color = colors[index % 5]
       for every_dot in cl[index]:
           plt.scatter(every_dot[0], every_dot[1], c=color, alpha=0.53)
       plt.scatter(my_cores[index][0], my_cores[index][1], marker='+', c='#000000', s=180)
   plt.show()

#Made by 柯少又来秀了

你可能感兴趣的:(模式识别)