《模式识别》孙即祥 P67页c均值算法聚类练习,最后画三维散点图展示聚类效果。
原始数据集加代码:
https://download.csdn.net/download/qq_37656107/10674888#0-qzone-1-2325-d020d2d2a4e8d1a374a433f596ad1440
python用的少, 很多地方写的不简洁, 开始没有考虑聚成不同数量的类,后面聚类成三类时只是简单改代码,增加了一类,只做下练习,就不改了。
聚类成两类
import numpy as np
import operator #包含列表比较函数
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
#创建一个空矩阵
date_matrix = np.zeros((20, 3))
#将数据读回矩阵
with open(r"C:\Users\Administrator\Desktop\模式识别\P67 数据集.txt") as f:
hang = 0
line = f.readline()
while line:
if line == '\n':
line = f.readline()
line=line.strip('\n')
date_matrix[hang,0] = float(line)
line = f.readline()
line.strip('\n')
date_matrix[hang, 1] = float(line)
line = f.readline()
line.strip('\n')
date_matrix[hang,2] = float(line)
hang += 1
line = f.readline()
#C均值算法实现聚类
#第一步 选取C个特征矢量作为聚类初始中心
#A = np.array([0,0,0])
#B = np.array([1,1,-1])
A = np.array([1,1,1])
B = np.array([-1,1,-1])
#最新的A,B类
A_list = []
B_list = []
#上一次聚类的A,B类
A_list_last = [0]
B_list_last = [0]
number=0 #记录迭代次数
#聚类
while operator.eq(A_list,A_list_last) == False:
number+=1
A_list_last = A_list[:]
B_list_last = B_list[:]
A_list=[]
B_list=[]
for i in range(0,20):
dist_A = np.sqrt(np.sum(np.square(date_matrix[i] - A))) #计算欧氏距离
dist_B = np.sqrt(np.sum(np.square(date_matrix[i] - B)))
if dist_A
聚类效果图
聚类成三类
import numpy as np
import operator #包含列表比较函数
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
#C均值算法实现聚类
#创建一个空矩阵
date_matrix = np.zeros((20, 3))
#将数据读回矩阵
with open(r"C:\Users\Administrator\Desktop\模式识别\P67 数据集.txt") as f:
hang = 0
line = f.readline()
while line:
if line == '\n':
line = f.readline()
line=line.strip('\n')
date_matrix[hang,0] = float(line)
line = f.readline()
line.strip('\n')
date_matrix[hang, 1] = float(line)
line = f.readline()
line.strip('\n')
date_matrix[hang,2] = float(line)
hang += 1
line = f.readline()
#第一步 选取C个特征矢量作为聚类初始中心
#A = np.array([0,0,0])
#B = np.array([1,1,-1])
A = np.array([-0.1,0,0.1])
B = np.array([0,-0.1,0.1])
C = np.array([-0.1,-0.1,0.1])
#最新的A,B类
A_list = []
B_list = []
C_list = []
#上一次聚类的A,B类 ,这里随便赋一组初值 防止第一次聚类错误
A_list_last = [0]
B_list_last = [0]
C_list_last = [0]
number=0 #记录迭代次数
#聚类
while operator.eq(A_list,A_list_last) == False | operator.eq(B_list,B_list_last) == False:
number+=1
A_list_last = A_list[:]
B_list_last = B_list[:]
C_list_last = C_list[:]
A_list = []
B_list = []
C_list = []
for i in range(0,20):
dist_A = np.sqrt(np.sum(np.square(date_matrix[i] - A))) #计算欧氏距离
dist_B = np.sqrt(np.sum(np.square(date_matrix[i] - B)))
dist_C = np.sqrt(np.sum(np.square(date_matrix[i] - C)))
if dist_A < dist_B:
if dist_C < dist_A:
C_list.append(i)
else:
A_list.append(i)
else:
if dist_B < dist_C:
B_list.append(i)
else:
C_list.append(i)
A = np.zeros(3)
B = np.zeros(3)
C = np.zeros(3)
for i in A_list: #更新聚类中心
A += date_matrix[i]
for i in B_list:
B += date_matrix[i]
for i in C_list:
C += date_matrix[i]
A = A / [len(A_list)]
B = B / [len(B_list)]
C = C / [len(C_list)]
print("本次聚类共迭代%d次,分组为:"%number)
print(A_list,B_list,C_list)
#画图
ax = plt.subplot(projection='3d') # 创建一个三维的绘图工程
x=[]
y=[]
z=[]
for i in A_list:
x.append(date_matrix[i][0])
y.append(date_matrix[i][1])
z.append(date_matrix[i][2])
ax.scatter(x, y, z, c='r',marker="v") # 绘制数据点
x=[]
y=[]
z=[]
for i in B_list:
x.append(date_matrix[i][0])
y.append(date_matrix[i][1])
z.append(date_matrix[i][2])
ax.scatter(x ,y ,z , c='g',marker="v")
x=[]
y=[]
z=[]
for i in C_list:
x.append(date_matrix[i][0])
y.append(date_matrix[i][1])
z.append(date_matrix[i][2])
ax.scatter(x ,y ,z , c='b',marker="v")
ax.set_zlabel('Z') # 坐标轴
ax.set_ylabel('Y')
ax.set_xlabel('X')
plt.show()
效果图