#!/usr/bin/python #-*-encoding:utf-8-*- import numpy as np import operator def classify(vect,dataset,lables,k=5): rows = dataset.shape[0] diffmat = np.tile(vect,(rows,1)) - dataset diffmat = diffmat ** 2 diffmat = diffmat.sum(axis=1)#n维数组降维为1维数组 distances = diffmat ** 0.5 sortedIndices = distances.argsort()#返回排序后索引 classCount={} for i in range(k): votedLabel = lables[sortedIndices[i]] classCount.setdefault(votedLabel,0) classCount[votedLabel] += classCount[votedLabel] + 1 #对classCount进行排序 sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1),reverse=True) return sortedClassCount[0][0] def load(filename,cols): fr = open(filename) arrayLines = fr.readlines() mat = np.zeros((len(arrayLines),cols)) label = [] labelval=[] valdict={} indexCount=0 for line in arrayLines: line = line.strip().strip("\n") listLine = line.split("\t") mat[indexCount,:] = listLine[0:-1] label.append(listLine[-1]) indexCount+=1 if not valdict.has_key(listLine[-1]): valdict[listLine[-1]]=float(indexCount) labelval.append(valdict[listLine[-1]]) if indexCount > 100: break return mat,label,labelval #数据归一化处理 def normalize(dataset): #n维数组axis=0维度的最小值,最大值 minval = dataset.min(0) maxval = dataset.max(0) ranges = maxval - minval normat = np.zeros(np.shape(dataset)) minmat = np.tile(minval,(dataset.shape[0],1)) mormat = (dataset - minmat)/np.tile(maxval,(dataset.shape[0],1)) return mormat