链接: 模式识别—聚类分析
def calcluate_distance(core: tuple, dot: tuple):
"""
计算两个点之间的欧氏距离
:param core: 质心坐标 (x,y) 类型为tuple
:param dot: 要计算距离的点(m,n) 类型为tuple
:return: 距离 dist 类型为float
"""
#dist用欧式距离公式获得
return dist
def calculate_cluster(dot: tuple, cores: list):
"""
计算给定点应该指派到哪一个质心
:param dot: 待处理的点
:param cores: 质心列表
:return: 应该指派到的质心的序号
"""
distance_list = []
for core in cores:
#获取到对应的dist
#添加到distance列表中
min_dist = min(distance_list)
#获取最短距离
put_to_index = distance_list.index(min_dist)
#获取最短距离对应的index
return put_to_index
def put_dot_into_clusters(row_data: list, k: int, cores: list):
"""
将点指派至最近质心的簇
:param cores:
:param row_data:
:param k:
:return: 已分配点的簇
"""
clusters = []
for each in range(k):
#创建对应个数的簇
for every_data in row_data:
#获取every_data的index
#添加对应的every_data到对应的簇
return clusters
def re_calculate_core(cluster: set):
"""
计算当前簇的下一个质心
:param cluster:
:return: new_core
"""
all_x = []
all_y = []
for each_dot in cluster:
#获取所有数据的X和y
#获取平均的X和Y
new_core = (round(avg_x, 2), round(avg_y, 2))
#生成新的质心,且保留两位小数
return new_core
for num in range(10):
#adot用random生成,用round保留两位小数
data_list.append(adot)
#添加adot到data_list中
IDE: Pyharm
Version:Python 3.7.3
from random import random, sample
from math import pow
#Made by 柯少又来秀了
#Made by 柯少又来秀了
#Made by 柯少又来秀了
def calcluate_distance(core: tuple, dot: tuple):
"""
计算两个点之间的欧氏距离
:param core: 质心坐标 (x,y) 类型为tuple
:param dot: 要计算距离的点(m,n) 类型为tuple
:return: 距离 dist 类型为float
"""
dist = pow(((dot[0] - core[0]) ** 2 + (dot[1] - core[1]) ** 2), 0.5)
# if dist == 0:
# print("00000000000", dot)
#考虑特殊情况
return dist
def calculate_cluster(dot: tuple, cores: list):
"""
计算给定点应该指派到哪一个质心
:param dot: 待处理的点
:param cores: 质心列表
:return: 应该指派到的质心的序号
"""
distance_list = []
for core in cores:
dist = calcluate_distance(core, dot)
#获取到对应的dist
distance_list.append(dist)
min_dist = min(distance_list)
#获取最短距离
put_to_index = distance_list.index(min_dist)
#获取最短距离对应的index
return put_to_index
def initiation_cores(row_data: list, k: int):
"""
根据row_data的数据生成初始质心
:param row_data: 原始数据
:param k: k值
:return: 质心列表
"""
cores = sample(row_data, k)
#python中random.sample()方法可以随机地从指定列表中提取出N个不同的元素,即创建质心列表
return cores
def put_dot_into_clusters(row_data: list, k: int, cores: list):
"""
将点指派至最近质心的簇
:param cores:
:param row_data:
:param k:
:return: 已分配点的簇
"""
clusters = []
for each in range(k):
clusters.append(set())
#set() 函数创建一个无序不重复元素集
for every_data in row_data:
index = calculate_cluster(every_data, cores)
clusters[index].add(every_data)
return clusters
def re_calculate_core(cluster: set):
"""
计算当前簇的下一个质心
:param cluster:
:return:
"""
all_x = []
all_y = []
for each_dot in cluster:
all_x.append(each_dot[0])
all_y.append(each_dot[1])
avg_x = sum(all_x) / len(all_x)
avg_y = sum(all_y) / len(all_y)
new_core = (round(avg_x, 2), round(avg_y, 2))
return new_core
if __name__ == '__main__':
#在if __name__ == 'main': 下的代码只有在第一种情况下(即文件作为脚本直接执行)才会被执行,而import到其他脚本中是不会被执行的
#生成n个点
data_list = []
#round() 方法返回浮点数x的四舍五入值
#round(80.23456, 2) : 80.23
#random() 方法返回随机生成的一个实数,它在[0,1)范围内
for num in range(10):
adot = (round(random() * 20 - 100, 2), round(random() * 20 - 100, 2))
data_list.append(adot)
for num in range(100):
adot = (round(random() * 100 + 100, 2), round(random() * 50 + 150, 2))
data_list.append(adot)
for num in range(50):
adot = (round(random() * 20, 2), round(random() * 20, 2))
data_list.append(adot)
for num in range(50):
adot = (round(random() * 100 + 100, 2), round(random() * 20, 2))
data_list.append(adot)
for num in range(100):
adot = (round(random() * 200, 2), round(random() * 200, 2))
data_list.append(adot)
# 设置k值
k = 4
# 生成初始质心
my_cores = initiation_cores(data_list, k)
roundx = 0
while True:
roundx += 1
# 指派
cl = put_dot_into_clusters(data_list, k, my_cores)
new_cores = list()
for index in range(k):
new_cores.append(re_calculate_core(cl[index]))
if new_cores == my_cores:
break
else:
my_cores = new_cores
import matplotlib.pyplot as plt
colors = ['#0000FF', '#FF0000', '#00FF00', '#666666', '#FFFF00']
for index in range(k):
color = colors[index % 5]
for every_dot in cl[index]:
plt.scatter(every_dot[0], every_dot[1], c=color, alpha=0.53)
plt.scatter(my_cores[index][0], my_cores[index][1], marker='+', c='#000000', s=180)
plt.show()
#Made by 柯少又来秀了