K邻近算法——对数据进行分类

代码如下:


from scipy.spatial import distance
#a是距离训练数据的一点,b是距离测试数据的一点
def euc(a,b):
    return distance.euclidean(a,b)

class ScrappyKNN():
    def fit(self,x_train,y_train):
        self.x_train=x_train
        self.y_train=y_train

    def predict(self,x_test):
        predictions=[]
        for row in x_test:
            label=self.closest(row)
            predictions.append(label)
        return predictions

    def closest(self,row):
        best_dist=euc(row, self.x_train[0])
        best_index=0
        for i in range(1, len(self.x_train)):
            dist=euc(row, self.x_train[i])
            if distreturn self.y_train[best_index]

from sklearn import datasets#导入数据集
iris=datasets.load_iris()

x=iris.data
y=iris.target

from sklearn.cross_validation import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size= .5)

#使用决策树
# from sklearn import tree
# my_classifier=tree.DecisionTreeClassifier()

#另一种分类器
#from sklearn.neighbors import KNeighborsClassifiers

my_classifier=ScrappyKNN()

my_classifier.fit(x_train,y_train)
predictions=my_classifier.predict(x_test)

print(predictions)#输出测试的结果

from sklearn.metrics import accuracy_score
print(accuracy_score(y_test,predictions))#测试的准确率(每次运行的的到的准确率可能不同,这是由训练测试的随机分成造成的)

你可能感兴趣的:(Python编程,机器学习)