最近数据挖掘实验,写个K-means算法,写完也不是很难,写的过程中想到python肯定有包,虽然师兄说不让用,不过自己也写完了,而用包的话,还不是很熟,稍微查找了下资料,学了下。另外,自己本身写的太烂了,不敢拿出来,后续改进了再写出来吧。
1.注意初始的点,需要转为numpy.array数组格式。
2.若是直接算中心点的话,直接调用kmeans2函数就行,后面的画图,只为了可视化。
#!/usr/bin/python import numpy import matplotlib import os matplotlib.use('Agg') from scipy.cluster.vq import * import pylab pylab.close() xy1=[[2,10],[2,5],[8,4],[5,8],[7,5],[6,4],[1,2],[4,9],[7,3],[1,3]] xy2=numpy.array(xy1) cluster_num=3 res, idx = kmeans2(numpy.array(zip(xy2[:,0],xy2[:,1])),cluster_num) print "local centre points:\n",res colors = ([([0.4,1,0.4],[1,0.4,0.4],[0.1,0.8,1])[i] for i in idx]) # plot colored points pylab.scatter(xy2[:,0],xy2[:,1]) # mark centroids as (X) pylab.scatter(res[:,0],res[:,1], marker='o', s = 500, linewidths=2, c='none') pylab.scatter(res[:,0],res[:,1], marker='x', s = 500, linewidths=2) #print os.getcwd() pylab.savefig('pic.png')效果图:
#---------------------------------------------------------------------------
参考:http://blog.csdn.net/brandohero/article/details/39967663
#!/usr/bin/python # Adapted from http://hackmap.blogspot.com/2007/09/k-means-clustering-in-scipy.html import numpy import matplotlib matplotlib.use('Agg') from scipy.cluster.vq import * import pylab pylab.close() # generate 3 sets of normally distributed points around # different means with different variances pt1 = numpy.random.normal(1, 0.2, (100,2)) pt2 = numpy.random.normal(2, 0.5, (300,2)) pt3 = numpy.random.normal(3, 0.3, (100,2)) # slightly move sets 2 and 3 (for a prettier output) pt2[:,0] += 1 pt3[:,0] -= 0.5 xy = numpy.concatenate((pt1, pt2, pt3)) # kmeans for 3 clusters res, idx = kmeans2(numpy.array(zip(xy[:,0],xy[:,1])),3) colors = ([([0.4,1,0.4],[1,0.4,0.4],[0.1,0.8,1])[i] for i in idx]) # plot colored points pylab.scatter(xy[:,0],xy[:,1], c=colors) # mark centroids as (X) pylab.scatter(res[:,0],res[:,1], marker='o', s = 500, linewidths=2, c='none') pylab.scatter(res[:,0],res[:,1], marker='x', s = 500, linewidths=2) pylab.savefig('/tmp/kmeans.png')
#------------------------------------
转载请认证于:http://write.blog.csdn.net/postedit/41158167