python实现K近邻算法

import numpy as np
import operator

# test 测试数据(1, feature_size)
# train_x 训练样本(samples, feature_size)
# train_y 训练样本标签(samples, 1)
# k 最近邻个数
def knn(test, train_x, train_y, k):
    m = train_x.shape[0]  # sample个数
    diff_matrix = np.tile(test, (m, 1)) - train_x # test的特征与每个sample的特征的差
    square_diff_matrix = diff_matrix ** 2  
    square_distances = square_diff_matrix.sum(axis = 1) 
    distances = square_distances ** 0.5 # test与每个sample的欧式距离
    sorted_distances_index = distances.argsort() # 返回排序从小到大对应的索引
    class_dt = {}
    for i in range(k):
        y_ = train_y[sorted_distances_index[i]]
        class_dt[y_] = class_dt.get(y_, 0) + 1

    sorted_class_y = sorted(class_dt.items(), key = operator.itemgetter(1), reverse = True)
    print(class_dt.items())
    print(sorted_class_y)
    return sorted_class_y[0][0]

* 关于函数numpy.tile()的用法,可以参考:Numpy中tile()函数简单理解

你可能感兴趣的:(python实现K近邻算法)