Python 实现K-means算法

# coding=utf-8
import os
import sys
from pylab import *
import random
import types
cny = {}
def genRandom():
    f = rand(3,2)
    print "random number:",f
    return f
def loaddata():
    vals = []
    f = open("D:/kmeans.txt","r")
    while True:
        line = f.readline();
        if not line.strip():break
        if len(line) == 0: break
        vals.append(line.strip("\n").split("\t"))       
    f.close()
    return vals
def euclid(list1,list2):
    res = 0.0
    #print len(list1)
    for index in range(0,len(list1)):
        res += (float(list1[index]) - float(list2[index]))**2
    return sqrt(res)


'''
根据随机的点聚成原始的分类中心
'''
def classfy(rn,val):
    cls = {}
    for d in range(0,len(rn)):
        cls[d+1] = "" 
    for xx in val:
        max = 0
        count = 0
        for v in rn:
            fv = euclid(v,xx)
            if(fv > max): 
                count+=1
                max = fv
        if type(cls[count])  is types.StringType:
            cls[count] += ",".join(xx)+":"
    vc = {}
    for kin in cls.keys():
        ds = cls[kin]   
        vc[kin] = ds.split(":")
    dx = kmeans(vc)
    return rn,vc
'''
t1 0.38,t2 0.6
'''


def canopy(vals,t1,t2):
    
    rnum = random.randint(0,len(vals)-1)
    center1 = vals[rnum]
    del vals[rnum]
    ccny = []
    #ccny = {}
    #cstrong = []
    #cweak = []
    for vl in vals:
        dis = euclid(vl,center1)
        if  dis <= t1:
            #cstrong.append(vl)
            ccny.append(vl)
            vals.remove(vl)
        elif dis > t1 and dis <=t2:
            ccny.append(vl)
            #cweak.append(vl)
            #vals.remove(vl)
    #ccny[0] = cstrong
    #ccny[1] = cweak
    if not len(ccny) == 0:
       cny[",".join(center1)] = ccny
    print "the remaning size of vals:",len(vals) 
    if len(vals) == 0:
        return cny
    else:
        return canopy(vals,t1,t2)
    
def kmeans(vc):
    res = []
    for fh in vc:
        size = 0
        resultx = 0.0
        resulty = 0.0
        for dv in vc[fh]:
           dvs = dv.split(",")
           size += 1
           if len(dvs) == 2:
             resultx += float(dvs[0])
             resulty += float(dvs[1])
        ww = []
        ww.append(resultx/size)
        ww.append(resulty/size)
        res.append(ww)
    return res 
    


val = loaddata()
print  canopy(val,0.38,0.6)



数据kmeans.txt:

0.3 0.25
0.9 0.6
0.5 0.9
0.2 0.15
0.3 1.0
0.85 0.55
0.95 0.4
0.2 0.3
0.12 0.1
0.1 0.9
0.8 0.5
0.13 0.3
0.95 0.75
0.4 0.9
0.25 0.8
0.8 0.65
0.2 0.85
0.1 0.3


你可能感兴趣的:(机器学习算法)