python实现简单的kmeans聚类算法

问题描述:一堆二维数据,用kmeans算法对其进行聚类,下面例子以分k=3为例。

原数据:

1.5,3.1
2.2,2.9
3,4
2,1
15,25
43,13
32,42
0,0
8,9
12,5
9,12
11,8
22,33
24,25

实现代码:

#coding:utf-8
from numpy import *
import string
import math

def loadDataSet(filename):
	dataMat = []
	fr = open(filename)
	for line in fr.readlines():
		element = line.strip('\n').split(',')
		number = []
		for i in range(len(element)):
			number.append(string.atof(element[i]))
		dataMat.append(number)
	return dataMat

def distEclud(vecA, vecB):
	count = len(vecA)
	s = 0.0
	for i in range(0, count):
		s = s + power(vecA[i]-vecB[i], 2)
	return sqrt(s)

def clusterOfElement(means, element):
	min_dist = distEclud(means[0], element)
	lable = 0
	for index in range(1, len(means)):
		dist = distEclud(means[index], element)
		if(dist < min_dist):
			min_dist = dist
			lable = index
	return lable
	
def getMean(cluster):	#cluster=[[[1,2],[1,2],[1,2]....],[[2,1],[2,1],[2,1],[2,1]...]]
	num = len(cluster)	#1个簇的num,如上为3个
	res = []
	temp = 0
	dim = len(cluster[0])
	for i in range(0, dim):
		for j in range(0, num):
			temp = temp + cluster[j][i]
		temp = temp / num
		res.append(temp)
	return res

def kMeans():
	k = 3
	data = loadDataSet('data.txt')
	print "data is ", data
	inite_mean = [[1.1, 1], [1, 1],[1,2]]
		
	count = 0
	while(count < 1000):
		count = count + 1
		clusters = []
		means = []
		for i in range(k):
			clusters.append([])
			means.append([])
		
		for index in range(len(data)):
			lable = clusterOfElement(inite_mean, data[index])
			clusters[lable].append(data[index])
			
		for cluster_index in range(k):
			mea = getMean(clusters[cluster_index])
			for mean_dim in range(len(mea)):	
				means[cluster_index].append(mea[mean_dim])
		
		for mm in range(len(means)):
			for mmm in range(len(means[mm])):
				inite_mean[mm][mmm] = means[mm][mmm]
		
	print "result cluster is ", clusters
	print "result means is ", inite_mean
	
kMeans()


你可能感兴趣的:(机器学习)