Machine Learning 笔记之KNN算法

#coding=utf8
#data 为 [[data],class] 形式
import math

class trainSet:
    def __init__(self,data,clas):
        self.Data=[]
        for i in range(len(clas)):
            self.Data.append([data[i],clas[i]])
        self.clas=lis=list(set(clas))

class KNN:
    def __init__(self,trainSet,data,k):
        self.trainSet=trainSet
        self.clas=trainSet.clas
        self.k=k
        self.data=data
        self.result=self.__getResult()

    def __getResult(self):
        kDistance=[[]for i in range(len(self.trainSet.clas))]
        kdistance=[0 for i in range(len(self.trainSet.clas))]
        #print kDistance
        for i in range(len(self.trainSet.Data)):
            #print self.trainSet.Data[i]
            distance=self.__calDidtance(self.trainSet.Data[i][0],self.data)
            #print kDistance[self.__getIndex(self.trainSet.Data[i][1])]
            kDistance[self.__getIndex(self.trainSet.Data[i][1])].append(distance)

        for i in range(len(kDistance)):
            sumd=0
            kDistance[i]=sorted(kDistance[i])
            kDistance[i]=kDistance[i][0:self.k]
            for j in range(len(kDistance[i])):
                sumd+=kDistance[i][j]
            sumd=1.0*sumd/len(kDistance[i])
            kdistance[i]=sumd
        self.result=self.clas[self.__findMin(kdistance)]
        #print 'aa',kDistance,kdistance,self.clas
        return self.result

    def __calDidtance(self,data1,data2):
        distance=0
        for i in range(len(data1)):
            distance+=(data1[i]-data2[i])**2
        distance=math.sqrt(distance)
        return distance


    def __getIndex(self,str1):
        index=0
        for i in range(len(self.clas)):
            if self.clas[i]==str1:
               index=i
        #print 's',self.clas,str1,index
        return index

    def __findMin(self,kdistance):
        minD=999
        index=0
        for t in range(len(kdistance)):
            if minD>kdistance[t]:
               index=t
               minD=kdistance[t]
        return index


data=[[1,1],[1,1.1],[2,2],[2,2.5]]
cls=['A','A','B','B']


train=trainSet(data,cls)
inputData=[1.6,1.2]

knn1=KNN(train,inputData,2)

print "The input is : ", inputData
print "The resullt is : ", knn1.result



你可能感兴趣的:(Python,算法,机器学习,算法)