import numpy as np
import math
class CustomKNeighborsClassifier():
'''
KNN 自定义实现类
'''
def __init__(self,k=3):
'''
初始化赋值,
X_train 特征测试集变量
y_train 标量测试集变量
res_class 预测结果
:param k: 默认值3
'''
self.k = k
self.X_train=[]
self.y_train=[]
self.res_class=[]
def fit(self,X_train,y_train):
'''
KNN模型训练
:param X_train: 训练集特征数据
:param y_train:训练集目标数据
:return:self
'''
assert X_train.shape[0] == y_train.shape[0], "训练集特征与目标值个数不匹配!"
assert self.k <= X_train.shape[0], "K值超出训练数据范围!"
self.X_train = X_train
self.y_train = y_train
print('K:',self.k)
print('X:', self.X_train.shape)
print('Y:', self.y_train.shape)
return self
# 距离计算
def distance_calculation(self, x_test):
t = 0
distance_ = []
while (t < len(x_test)):
# 计算测试集距离
# distance = [math.sqrt(np.sum(train_x - x_test) ** 2) for train_x in self.X_train]
distance_.append([math.sqrt(np.sum(train_x[0] - x_test[t][0]) ** 2 + np.sum(train_x[1] - x_test[t][1]) ** 2) for train_x in self.X_train])
t += 1
return distance_
def predict(self,X_predict):
# 断言检查预测数据集非空,且特征数是否与训练数据集相同 X_predict is not None or
assert X_predict.shape[1] == self.X_train.shape[1],'特征数不匹配!!!'
# 返回预测数据与训练数据的距离
distances = self.distance_calculation(X_predict)
print("test_x与训练集数据距离:",distances)
# 截取k范围内的近邻数据
count_mix = self.count_train_by_k(distances)
print("k近邻数据:", count_mix)
# 多数表决预测分类:
self.max_Voter(count_mix)
return self
# 对距离进行排序,并返回k值范围内的样本数据
def count_train_by_k(self,distances):
k = self.k
# 将每个预测数据与特征数据之间的距离转换成字典 key为
dict_distances = {}
count_mixs=[]
count_mixs_=[]
# 将距离值转换成带有key值的字典
#[[{},{}]]
for i in range(len(distances)):
dic_ = {}
for i2 in range(len(distances[i])):
dic_[i2] = distances[i][i2]
count_mixs.append(dic_)
count_mixs__=[]
# 排序并截取k个范围内的最近值
for i in range(len(count_mixs)):
dic_new_distance = {}
dict_distances = dict(sorted(count_mixs[i].items(), key=lambda x: x[1], reverse=False))
ct=0
#遍历预测数据的距离集字典,截取前K个数据
for key in dict_distances:
dic_new_distance[key] = dict_distances[key]
ct += 1
if ct == self.k:
break
count_mixs__.append(dic_new_distance)
return count_mixs__
# 多数表决分类
def max_Voter(self,count_mix):
# 获取分类
list_class=[]
for i in range(len(count_mix)):
list_class2 = []
for key in count_mix[i]:
list_class2.append(self.y_train[key][0])
list_class.append(list_class2)
print('k近邻结果打印:',list_class)
# 对Y值进行分类
set_list_class=[]
for i in range(len(list_class)):
set_list_class.append(set(list_class[i]))
# 对分类进行统计并返回最大的Y值
for i in range(len(set_list_class)):
dict_count = {}
for item in set_list_class[i]:
dict_count.update({item: list_class[i].count(item)})
cls = max(dict_count, key=dict_count.get)
self.res_class.append(cls)
print("最终预测结果", self.res_class)
return self
from knn实现.KneighborsClassifierCustom import CustomKNeighborsClassifier
import numpy as np
kn = CustomKNeighborsClassifier(5)
# 训练数据
X=[[1,1],[1,2],[1,3],[2,1],[2,2],[2,3],[3,1],[3,2],[3,3],
[6,6],[6,7],[6,8],[7,6],[7,7],[7,8], [8,6],[8,7],[8,8],
[11,1],[11,2],[11,3],[12,1],[12,2],[12,3],[13,1],[13,2],[13,3]]
# Y=[[0],[0],[0],[0],[0],
# [1],[1],[1],[1],[1],
# [2],[2],[2],[2],[2]]
Y=[['A'],['A'],['A'],['A'],['A'],['A'],['A'],['A'],['A'],
['B'],['B'],['B'],['B'],['B'],['B'],['B'],['B'],['B'],
['C'],['C'],['C'],['C'],['C'],['C'],['C'],['C'],['C']]
X=np.asarray(X)
Y=np.asarray(Y)
#模型训练
kn.fit(X,Y)
# 数据预测
x_test = [[10,1],[5,5]]
X_test = np.asarray(x_test)
kn.predict(X_test)
# 散点图观察
x1=[]
y1=[]
for i in np.asarray(X):
x1.append(i[0])
y1.append(i[1])
x2=[]
y2=[]
for i in np.asarray(x_test):
x2.append(i[0])
y2.append(i[1])
import matplotlib.pyplot as plt
plt.plot(x1,y1,'r*')
plt.plot(x2,y2,'g+')
plt.show()