KNN

from numpy import *
import operator


def create_datas():
    datasets = array([[1, 0], [0, 1], [0, 0], [1, 1], [2, 2]])
    labels = ['A', 'A', 'A', 'B', 'B']
    return datasets, labels


def KNN_algorithm(x, datas, labels, k):
    print('x=', x)
    tmp = tile(x, (datas.shape[0], 1))
    print('res=\n', tmp)
    diffMax = datas - tmp
    print('diffMax=\n', diffMax)
    powMax = diffMax ** 2
    print('powMax=\n', powMax)
    sumPowMax = powMax.sum(axis=1)
    print('sumPowMax=\n', sumPowMax)
    sqrtMax = sumPowMax ** 0.5
    print('sqrtMax=\n', sqrtMax)
    sortMat = sqrtMax.argsort()
    print('sortMat=\n', sortMat)
    countMat = {}
    for key in range(k):
        print('i = ', sortMat[key])
        print('label = ', labels[sortMat[key]])
        if labels[sortMat[key]] in countMat.keys():
            countMat[labels[sortMat[key]]] += 1
        else:
            countMat[labels[sortMat[key]]] = 1
    sortedResult = sorted(countMat.items(), key=operator.itemgetter(1), reverse=True)
    print('sortedResult=\n', sortedResult)
    return sortedResult



def domain():
    datas, labels = create_datas()
    print('datas=\n', datas)
    print('labels=', labels)
    res = KNN_algorithm([1, 2], datas, labels, 3)
    print('res = ', res[0])


if __name__ == '__main__':
    domain()

欧式距离:在这里插入图片描述

其算法的描述为:

1)计算测试数据与各个训练数据之间的距离;

2)按照距离的递增关系进行排序;

3)选取距离最小的K个点;

4)确定前K个点所在类别的出现频率;

5)返回前K个点中出现频率最高的类别作为测试数据的预测分类。

你可能感兴趣的:(机器学习)