K-Means算法

import numpy as np
import matplotlib.pyplot as plt


def L2(vecXi, vecYi):
    return np.sqrt(np.sum(np.power(vecXi - vecYi, 2)))


def K_means(S, k, disMeas=L2):
    m = np.shape(S)[0]
    sampleTag = np.zeros(m)

    n = np.shape(S)[1]
    clusterCents = np.mat(np.zeros((k, n)))
    for j in range(n):
        minJ = min(S[:, j])
        rangJ = float(max(S[:, j]) - minJ)
        clusterCents[:, j] = np.mat(minJ + rangJ * np.random.rand(k, 1))
    sampleTagChanegd = True
    SSE = 0
    while sampleTagChanegd:
        sampleTagChanegd = False
        SSE = 0

        for i in range(m):
            minD = np.inf
            minIndex = -1
            for j in range(k):
                d = disMeas(clusterCents[j, :], S[i, :])
                if d < minD:
                    minD = d
                    minIndex = j
            if sampleTag[i] != minIndex:
                sampleTagChanegd = True

            sampleTag[i] = minIndex
            SSE += minD ** 2
        print(clusterCents)
        plt.scatter(S[:, 0], S[:, 1], c=sampleTag, linewidths=np.power(sampleTag + 0.5, 2))
        plt.show()
        print(SSE)

    for i in range(k):
        ClustI = S[np.nonzero(sampleTag[:] == i)[0]]
        clusterCents[i, :] = np.mean(ClustI, axis=0)
    return clusterCents, sampleTag, SSE


if __name__ == '__main__':
    samples = np.loadtxt('kmeansSamples.txt')
    clusterCents, sampleTag, SSE = K_means(samples, 3)
    plt.scatter(samples[:, 0], samples[:, 1], c=sampleTag, linewidths=np.power(sampleTag + 0.5, 2))
    plt.show()
    print(clusterCents)
    print(SSE)

文件是机器学习上提供的文件,也可使用自己的文件

你可能感兴趣的:(算法,kmeans,numpy)