西瓜书《机器学习》课后答案——chapter10 _10.1 KNN算法

编程实现k邻近分类器,在西瓜数据集3.0α上比较其分类边界与决策树分类边界的异同。

# -*-coding: gbk -*-
""" @Author: Victoria @Date: 2017.10.26 9:00 """
import xlrd
import numpy as np
import matplotlib.pyplot as plt
import operator

class KNN():
    def __init__(self, k):
        self.k = k

    def train(self, X, y):
        self.N = len(X)
        self.d = len(X[0])
        self.X = X
        self.y = y

    def predict(self, x):
        neighbors, neighbors_labels = self.k_neighbors(x)
        #print "neightbors", neighbors
        votes = {}
        for label in neighbors_labels:
            if label not in votes:
                votes[label] = 1
            else:
                votes[label] += 1
        sorted_votes = sorted(votes.items(), key=operator.itemgetter(1), reverse=True)
        return sorted_votes[0][0]

    def predicts(self, X):
        labels = []
        for x in X:
            label = self.predict(x)
            labels.append(label)
        return labels

    def k_neighbors(self, x):
        distances = []
        for i in range(self.N):
            dist = self.distance(x, self.X[i])
            distances.append([self.X[i], self.y[i], dist])
        distances.sort(key=operator.itemgetter(2))
        neighbors = []
        neighbors_labels = []
        for k in range(self.k):
            neighbors.append(distances[k][0])
            neighbors_labels.append(distances[k][1])
        return neighbors, neighbors_labels

    def distance(self, x, y):
        sum = 0
        for i in range(self.d):
            sum += (x[i]-y[i])**2
        return sum

def plot_desicion_boundary(X, y, knn):
    x_min = np.array(X)[:, 0].min() - 0.1
    x_max = np.array(X)[:, 0].max() + 0.1
    y_min = np.array(X)[:, 1].min() - 0.1
    y_max = np.array(X)[:, 1].max() + 0.1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))
    print np.vstack([xx.ravel(), yy.ravel()]).T.tolist()
    Z = knn.predicts(np.vstack([xx.ravel(), yy.ravel()]).T.tolist())
    Z = np.array(Z).reshape(xx.shape)
    print Z
    f, axarr = plt.subplots(1, 1, figsize=(10, 8))
    axarr.contourf(xx, yy, Z, alpha=0.4)
    axarr.scatter(np.array(X)[:, 0], np.array(X)[:, 1], c=y, s=10, edgecolor='k')
    axarr.set_title("KNN (k={})".format(knn.k))
    plt.savefig("k={}.png".format(knn.k))
    plt.show()

def main():
    workbook = xlrd.open_workbook("../../数据/3.0alpha.xlsx")
    sheet = workbook.sheet_by_name("Sheet1")
    X = []
    for i in range(17):
        X.append(sheet.col_values(i)[0:2])
    y = sheet.row_values(3)
    knn = KNN(1)
    knn.train(X, y)
    plot_desicion_boundary(X, y, knn)
    #print "x", X[0]
    #print "predict:{}, real: {}".format(knn.predict(X[0]), y[0])


if __name__=="__main__":
    main()

西瓜书《机器学习》课后答案——chapter10 _10.1 KNN算法_第1张图片

西瓜书《机器学习》课后答案——chapter10 _10.1 KNN算法_第2张图片

西瓜书《机器学习》课后答案——chapter10 _10.1 KNN算法_第3张图片

参考:
[1] http://scikit-learn.org/stable/auto_examples/ensemble/plot_voting_decision_regions.html
[2] Tutorial To Implement k-Nearest Neighbors in Python From Scratch

你可能感兴趣的:(机器学习)