机器学习实战笔记 k-近邻算法

P 18 改变当前目录

import os
os.getcwd() # 查看当前工作目录
os.chdir("F:\\Python") # 改变目录 注意双下划线


P 19 代码

# -*- coding: utf-8 -*-
from numpy import *
import operator

def createDataSet():
	group = array ([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
	labels = ['A','A','B','B']
	return group, labels

def classify0(inX, dataSet, labels, k):
	#If Y has n rows and m columns, then Y.shape is (n,m). So Y.shape[0] is n.
	dataSetSize = dataSet.shape[0]
	#Construct an array by repeating inX the number of times given by reps.
	#dataSetsize rows and one column there
	diffMat = tile(inX, (dataSetSize, 1)) - dataSet
	#Matrix square calculation
	sqDiffMat = diffMat ** 2
	#perform a sum over each row
	sqDistances = sqDiffMat.sum(axis= 1)
	#root
	distances = sqDistances ** 0.5
	#sort the index  eg.
	#>>> x = np.array([3, 1, 2])
	#>>> np.argsort(x)
	#array([1, 2, 0])
	sortedDistIndicies = distances.argsort()
	classCount = {}
	for i in range(k):
		voteIlabel = labels[sortedDistIndicies[i]]
		#if (classCount[voteIlabel] exists)
		#	classCount.get(voteIlabel,0) = classCount[voteIlabel];
		#else classCount.get(voteIlabel,0) = 0;
		classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
	#Parameter 'reverse' is using to flag descending sorts
	sortedClassCount = sorted(classCount.iteritems(), 
		key = operator.itemgetter(1), reverse = True)
	#return the most likely label
	return sortedClassCount[0][0]

P 21 代码

def file2matrix(filename):
	#open the file
	fr = open(filename)
	#readlines() reads until EOF using readline() and returns a list containing the lines.
	arrayOLines = fr.readlines()
	# get the number of lines
	numberOfLines = len(arrayOLines)
	# 创建m行n列的零矩阵 
	returnMat = zeros((numberOfLines,3))
	 # define a dictionary (can be append element)  
	classLabelVector = []
	index = 0
	for line in arrayOLines:
		line = line.strip()
		listFromLine = line.split('\t')
		returnMat[index,:] = listFromLine[0 : 3]
		classLabelVector.append(int(listFromLine[-1]))
		index += 1
	return returnMat, classLabelVector



 
 

你可能感兴趣的:(机器学习实战笔记)