撰写一个分类器:k-近邻算法(简化版)

欧氏距离:(n维空间中两点的距离)

代码:

# k近邻算法(简化版)

from scipy.spatial import distance
def euc(a, b):
    return distance.euclidean(a, b) # 测量a、b两点间的距离

class Lfw():
    def fit(self, x_train, y_train):
        self.x_train = x_train
        self.y_train = y_train
    
    def predict(self, x_test):
        predictions = []
        for row in x_test:
            label = self.closest(row) # k为1,即只参考最近的一个点
            predictions.append(label)
        return predictions
    
    def closest(self, row): # 找到离测试点最近的那个点,并返回其标签
        best_dist = euc(row, self.x_train[0])
        best_index = 0
        for i in range(1, len(self.x_train)):
            dist = euc(row, self.x_train[i])
            if dist < best_dist:
                best_dist = dist
                best_index = i
        return self.y_train[best_index]

    
from sklearn import datasets
iris = datasets.load_iris() # 载入花的数据

x = iris.data
y = iris.target

from sklearn.cross_validation import train_test_split
# 将数据分成训练数据和测试数据,x是特征数据,y是标签数据
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = .5) 

classifier = Lfw()
classifier.fit(x_train, y_train) # 用训练数据拟合分类器模型
predictions = classifier.predict(x_test) # 预测

from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, predictions)) # 打印出的是准确率

结果:准确率是大于0.9的

你可能感兴趣的:(机器学习)