手撸KNN算法-简单实现

class KNeighborsClassifier(object):
    def __init__(self, n_neighbors = 5): #n_neighbors代表参与决策的样本数,初始化为5
        self.n_neighbors = n_neighbors

    def fit(self, data_x, data_y): #KNN不需要训练。
        self.x = data_x
        self.y = data_y
        if data_x.shape[0] != data_y.shape[0] :
            raise ValueError('数据集与输入数据维度不同')

    def predict(self, x):
        result = []
        #x_len = self.x.shape[1]
        #self_x_len = x.shape[1]
        #if x_len != self_x_len:
            #raise ValueError('数据集与输入数据维度不同')
        #distansquare = (self.x - x)**2
        #distance = np.sum(distansquare, axis=1) ** 0.5
        len_x = x.shape[0]
        for i in range(len_x):    #每个样本都进行计算
            count = {}
            inx = x[i]           #inx 表示第i个样本
            distance = np.sum((self.x - inx)**2, axis=1) ** 0.5  #这里采用欧式距离
            #print(distance.shape)
            distancesort = distance.argsort() #对样本按距离大小进行排序
            for k in range(self.n_neighbors): #遍历最近的n_neighbors个点
                value = self.y[distancesort[k]]  #得到相应的点的值
                count[value] = count.get(value,0) + 1  #计数并保持在字典中
            #print(count)
            SortCalssCount = sorted(count.items(),key=lambda count:count[1],reverse=True)   #对字典进行排序,找出类别数最多的点。
            result.append(SortCalssCount[0][0]) #保存在列表中
        return result

接下来对KNeighborsClassifier进行试验,使用sklearn中的数据集进行试验

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris = load_iris()
x = iris.data
y = iris.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3)
#print(x_train.shape)
knn = KNeighborsClassifier()
knn.fit(x_train, y_train)
result = knn.predict(x_test)
#print(result)
#print(y_test)
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, result)) #计算准确率

运行一下,准确率是0.977777777778

你可能感兴趣的:(机器学习)