本文出自:http://blog.csdn.net/xizhibei
自从上次介绍过c++版本的k-meansK-means之C++及OpenCV实现,感觉有些不足的地方,加上近些时间在学习python(好吧,是觉得Python比Perl好点),而且刚好有对应的OpenCVPython库,于是就写了个Python版本的
from cv import * import numpy as np class Cluster: center = [] pre_center = [] pts = [] color = () def is_good_result(cluster): for c in cluster: if np.linalg.norm(c.center - c.pre_center) > 1.0: return False return True def update_center(cluster): for c in cluster: c.pre_center = c.center #print len(c.pts) c.center = np.sum(c.pts,0) / len(c.pts) s = 0 for p in c.pts: s = s + (p[0] - c.center[0])**2 + (p[1] - c.center[1])**2 print s#输出方差,可以看到运行的时候不断减小 c.pts = [] return cluster def get_rand_pts(K,img_size,num):#这里根据K直接随机生成相应的椭圆区域,不再是矩形区域了 #pts = np.random.rand(num,2) * img_size center = np.random.rand(K,2) * (np.array(img_size) - np.array([300,300])) + np.array([150,150]) r = np.random.rand(K,2) * (200,200) + (100,100) pts = [] for i in xrange(num): tmp = np.random.rand(2) * np.pi tmp[0] = np.cos(tmp[0]) tmp[1] = np.sin(tmp[1]) pts.append(center[i % K] + r[i % K] * np.random.rand(2) * tmp) return pts def show_outcome(img,cluster): ## K = len(cluster)#这里注释掉的内容是因为太耗时间,不知道怎么回事,现在还解决不了 ## for y in xrange(img.height): ## for x in xrange(img.width): ## min_k = 0 ## min_val = 100000 ## for k in xrange(K): ## p = (x,y) ## #val = np.sqrt((p[0] - cluster[k].center[0])**2 + (p[1] - cluster[k].center[1])**2) ## val = np.linalg.norm(p - cluster[k].center) ## if val < min_val: ## min_k = k ## min_val = val ## img[y,x] = cluster[min_k].color for c in cluster: #print c.pts Circle(img,(int(c.center[0]),int(c.center[1])),10,c.color,CV_FILLED) for x, y in np.int32(c.pts): Circle(img,(x,y),3,c.color,CV_FILLED) #Line(img,(x + 5,y),(x - 5,y),c.color,2) #Line(img,(x,y + 5),(x,y - 5),c.color,2) NamedWindow("Image") ShowImage("Image",img) WaitKey(0) DestroyWindow("Image") def main(): K = 4 PTS_NUM = 600 img = CreateImage((1200,800),IPL_DEPTH_8U,3) pts = get_rand_pts(K,(img.width,img.height),PTS_NUM) cluster = [Cluster() for i in xrange(K)] init_k = np.arange(0,PTS_NUM - 1) np.random.shuffle(init_k) init_k = init_k[:K] for i in xrange(K): cluster[i].pre_center = [0,0] cluster[i].center = pts[init_k[i]] cluster[i].color = map(int,np.random.randint(0,1024,3) * 4 % 255) cluster[i].pts = [] #print cluster[i].center times = 0 while(True): for p in pts: min_k = 0 min_val = 100000 for j in xrange(K): val = np.linalg.norm(p - cluster[j].center) if val < min_val: min_k = j min_val = val cluster[min_k].pts.append(p) times = times + 1 if is_good_result(cluster): break print "Times: %d"%(times) update_center(cluster) show_outcome(img,cluster) if __name__ == "__main__": main()
关于kmeans不多说,上次已经介绍过了,值得提一下的是numpy这个库太强大了!!!