《机器学习技法》最后一次作业,共有三个编程实验,倒数两个是knn和kmeans。照例用sklearn做,很快。
knn:分别求k=1和k=5时的Ein和Eout.
from sklearn import neighbors import numpy as np train_data = np.loadtxt('hw4_knn_train.dat') train_x = train_data[:, :-1] train_y = train_data[:,-1] test_data = np.loadtxt('hw4_knn_test.dat') test_x = test_data[:, :-1] test_y = test_data[:,-1] clf = neighbors.KNeighborsClassifier(n_neighbors=5) clf.fit(train_x, train_y) err_in = 1 - clf.score(train_x, train_y) err_out = 1 - clf.score(test_x, test_y) print err_in, err_out
分别求k=2和k=10时的Ein
from __future__ import division from sklearn.cluster import KMeans import numpy as np data = np.loadtxt('hw4_kmeans_train.dat') N = data.shape[0] repeat = 500 e_in = 0 for i in range(repeat): clf = KMeans(n_clusters=10, init='random', max_iter=300) clf.fit(data) e_in = e_in + clf.inertia_/N print e_in/repeat