import numpy as np
import operator as op
def createDataSet():
group = np.array([[1.0, 1.1], [1.0,1.0],[0,0],[0,0.1]])
labels = ['A','A','B','B']
return group, labels
group,labels = createDataSet()
# print(group, labels)
'''
inX:分类的输入向量
dataSet:输入的训练样本集
labels:标签向量
k:选择最近邻的数目
欧式距离
'''
def classify(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
# tile 复制矩阵dataSetSize行,然后矩阵相减
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDifMat = diffMat**2
# 矩阵每列的和相加,保存到第一个
sqDistance = sqDifMat.sum(axis = 1)
distance = sqDistance**0.5
#排序返回索引
sortedDistIndicies = distance.argsort()
classCount = {}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
# 字典通过key返回value;且一定要加0
classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1
# 通过公共建对字典序列进行排序
sortedClassCount = sorted(classCount.items(), key=op.itemgetter(1),reverse=True)
return sortedClassCount[0][0]
ret = classify([1,1], group, labels, 3)
print(ret)
-->A