提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
锻炼自己实现算法的能力,
提示:以下是本篇文章正文内容,下面案例可供参考
K-Means算法
'''
K-means算法是一种常用的无监督聚类算法,可以视作同时优化质心和每个样本的标签,使得损失函数最小,算法执行过程提现了EM的思想
'''
import numpy as np
import random
class KMeans():
def __init__(self, K=2):
self.k = K
def __dist(self, a, b):
return np.sqrt(sum(np.power((a-b), 2)))
#输入ndarray类型X,输出每个样本所对应的簇
def fit(self, X):
#1.初始化质心
clusters = [[X[random.randint(0, len(X)-1)], 1] for i in range(self.k)]
labels = [0 for i in range(len(X))]
#2.每次更新节点所属簇
changed = True
while changed:
changed = False
old_labels = labels.copy()
for i in range(len(X)):
c0 = clusters[0][0] / clusters[0][1]
cur_dist = self.__dist(c0, X[i])
for j in range(1, len(clusters)):
cj = clusters[j][0] / clusters[j][1]
if cur_dist > self.__dist(cj, X[i]):
labels[i] = j
cur_dist = self.__dist(cj, X[i])
if old_labels != labels:changed = True
#3.更新质点
for k in range(len(clusters)):
clusters[k] = [np.zeros((X.shape[1])), 0]
for i in range(len(X)):
clusters[labels[i]][0] += X[i]
clusters[labels[i]][1] += 1
return labels
if __name__ == '__main__':
dataSet = [
[0.697, 0.460], [0.774, 0.376], [0.634, 0.264], [0.608, 0.318], [0.556, 0.215], [0.403, 0.237], [0.481, 0.149],
[0.437, 0.211], [0.666, 0.091], [0.243, 0.267], [0.245, 0.057], [0.343, 0.099], [0.639, 0.161], [0.657, 0.198],
[0.360, 0.370], [0.593, 0.042], [0.719, 0.103], [0.359, 0.188], [0.339, 0.241], [0.282, 0.257], [0.748, 0.232],
[0.714, 0.346], [0.483, 0.312], [0.478, 0.437], [0.525, 0.369], [0.751, 0.489], [0.532, 0.472], [0.473, 0.376],
[0.725, 0.445], [0.446, 0.459]
]
k = 3
X = np.array(dataSet)
result = KMeans(K=k).fit(X)
import matplotlib.pyplot as plt
colors = ['b', 'g', 'r']
for i in range(k):
ys = []
for j in range(len(result)):
if result[j] == i:ys.append(j)
plt.scatter(X[ys,0], X[ys,1], c=colors[i], alpha=0.5)
plt.show()
print(result)
pass