手写简单的KNN算法

 KNN实现类

import  numpy as np
import math
class CustomKNeighborsClassifier():
    '''
        KNN 自定义实现类
    '''

    def __init__(self,k=3):
        '''
            初始化赋值,
            X_train 特征测试集变量
            y_train 标量测试集变量
            res_class 预测结果
        :param k: 默认值3
        '''
        self.k = k
        self.X_train=[]
        self.y_train=[]
        self.res_class=[]

    def fit(self,X_train,y_train):
        '''
            KNN模型训练
        :param X_train: 训练集特征数据
        :param y_train:训练集目标数据
        :return:self
        '''
        assert X_train.shape[0] == y_train.shape[0], "训练集特征与目标值个数不匹配!"
        assert self.k <= X_train.shape[0], "K值超出训练数据范围!"
        self.X_train = X_train
        self.y_train = y_train
        print('K:',self.k)
        print('X:', self.X_train.shape)
        print('Y:', self.y_train.shape)
        return self

     # 距离计算
    def distance_calculation(self, x_test):
        t = 0
        distance_ = []
        while (t < len(x_test)):

            # 计算测试集距离
            # distance = [math.sqrt(np.sum(train_x - x_test) ** 2) for train_x in self.X_train]
            distance_.append([math.sqrt(np.sum(train_x[0] - x_test[t][0]) ** 2 + np.sum(train_x[1] - x_test[t][1]) ** 2) for train_x in self.X_train])
            t += 1
        return distance_

    def predict(self,X_predict):
        # 断言检查预测数据集非空,且特征数是否与训练数据集相同 X_predict is not None or
        assert X_predict.shape[1] == self.X_train.shape[1],'特征数不匹配!!!'
        # 返回预测数据与训练数据的距离
        distances = self.distance_calculation(X_predict)
        print("test_x与训练集数据距离:",distances)
        # 截取k范围内的近邻数据
        count_mix = self.count_train_by_k(distances)
        print("k近邻数据:", count_mix)
        # 多数表决预测分类:
        self.max_Voter(count_mix)
        return self


    # 对距离进行排序,并返回k值范围内的样本数据
    def count_train_by_k(self,distances):
        k = self.k
        # 将每个预测数据与特征数据之间的距离转换成字典 key为
        dict_distances = {}
        count_mixs=[]
        count_mixs_=[]
        # 将距离值转换成带有key值的字典
        #[[{},{}]]

        for i in range(len(distances)):
            dic_ = {}
            for i2 in range(len(distances[i])):
                dic_[i2] = distances[i][i2]
            count_mixs.append(dic_)

        count_mixs__=[]
        # 排序并截取k个范围内的最近值
        for i in range(len(count_mixs)):
            dic_new_distance = {}
            dict_distances = dict(sorted(count_mixs[i].items(), key=lambda x: x[1], reverse=False))
            ct=0
            #遍历预测数据的距离集字典,截取前K个数据
            for key in dict_distances:
                dic_new_distance[key] = dict_distances[key]
                ct += 1
                if ct == self.k:
                    break
            count_mixs__.append(dic_new_distance)
        return count_mixs__

    # 多数表决分类
    def max_Voter(self,count_mix):
        # 获取分类
        list_class=[]
        for i in range(len(count_mix)):
            list_class2 = []
            for key in count_mix[i]:
                list_class2.append(self.y_train[key][0])
            list_class.append(list_class2)
        print('k近邻结果打印:',list_class)

        # 对Y值进行分类
        set_list_class=[]
        for i in range(len(list_class)):
            set_list_class.append(set(list_class[i]))

        # 对分类进行统计并返回最大的Y值
        for i in range(len(set_list_class)):
            dict_count = {}
            for item in set_list_class[i]:
                dict_count.update({item: list_class[i].count(item)})
                cls =  max(dict_count, key=dict_count.get)
            self.res_class.append(cls)
        print("最终预测结果", self.res_class)
        return self

 


测试

from knn实现.KneighborsClassifierCustom import CustomKNeighborsClassifier
import numpy as np

kn = CustomKNeighborsClassifier(5)


# 训练数据
X=[[1,1],[1,2],[1,3],[2,1],[2,2],[2,3],[3,1],[3,2],[3,3],
   [6,6],[6,7],[6,8],[7,6],[7,7],[7,8], [8,6],[8,7],[8,8],
   [11,1],[11,2],[11,3],[12,1],[12,2],[12,3],[13,1],[13,2],[13,3]]

# Y=[[0],[0],[0],[0],[0],
#    [1],[1],[1],[1],[1],
#    [2],[2],[2],[2],[2]]

Y=[['A'],['A'],['A'],['A'],['A'],['A'],['A'],['A'],['A'],
   ['B'],['B'],['B'],['B'],['B'],['B'],['B'],['B'],['B'],
   ['C'],['C'],['C'],['C'],['C'],['C'],['C'],['C'],['C']]

X=np.asarray(X)
Y=np.asarray(Y)
#模型训练
kn.fit(X,Y)
# 数据预测
x_test = [[10,1],[5,5]]
X_test = np.asarray(x_test)
kn.predict(X_test)

# 散点图观察
x1=[]
y1=[]
for i in np.asarray(X):
   x1.append(i[0])
   y1.append(i[1])

x2=[]
y2=[]
for i in np.asarray(x_test):
   x2.append(i[0])
   y2.append(i[1])

import matplotlib.pyplot as plt
plt.plot(x1,y1,'r*')
plt.plot(x2,y2,'g+')
plt.show()

你可能感兴趣的:(python,机器学习,算法,KNN)