kmeans算法python实现三类_python实现简单的kmeans聚类算法

问题描述:一堆二维数据,用kmeans算法对其进行聚类,下面例子以分k=3为例。

原数据:

1.5,3.1

2.2,2.9

3,4

2,1

15,25

43,13

32,42

0,0

8,9

12,5

9,12

11,8

22,33

24,25

实现代码:

#coding:utf-8

from numpy import *

import string

import math

def loadDataSet(filename):

dataMat = []

fr = open(filename)

for line in fr.readlines():

element = line.strip('\n').split(',')

number = []

for i in range(len(element)):

number.append(string.atof(element[i]))

dataMat.append(number)

return dataMat

def distEclud(vecA, vecB):

count = len(vecA)

s = 0.0

for i in range(0, count):

s = s + power(vecA[i]-vecB[i], 2)

return sqrt(s)

def clusterOfElement(means, element):

min_dist = distEclud(means[0], element)

lable = 0

for index in range(1, len(means)):

dist = distEclud(means[index], element)

if(dist < min_dist):

min_dist = dist

lable = index

return lable

def getMean(cluster):#cluster=[[[1,2],[1,2],[1,2]....],[[2,1],[2,1],[2,1],[2,1]...]]

num = len(cluster)#1个簇的num,如上为3个

res = []

temp = 0

dim = len(cluster[0])

for i in range(0, dim):

for j in range(0, num):

temp = temp + cluster[j][i]

temp = temp / num

res.append(temp)

return res

def kMeans():

k = 3

data = loadDataSet('data.txt')

print "data is ", data

inite_mean = [[1.1, 1], [1, 1],[1,2]]

count = 0

while(count < 1000):

count = count + 1

clusters = []

means = []

for i in range(k):

clusters.append([])

means.append([])

for index in range(len(data)):

lable = clusterOfElement(inite_mean, data[index])

clusters[lable].append(data[index])

for cluster_index in range(k):

mea = getMean(clusters[cluster_index])

for mean_dim in range(len(mea)):

means[cluster_index].append(mea[mean_dim])

for mm in range(len(means)):

for mmm in range(len(means[mm])):

inite_mean[mm][mmm] = means[mm][mmm]

print "result cluster is ", clusters

print "result means is ", inite_mean

kMeans()

你可能感兴趣的:(kmeans算法python实现三类_python实现简单的kmeans聚类算法)