import numpy as np
import operator
'''
《机器学习实战》kNN分类算法及注释
'''
'\n《机器学习实战》kNN分类算法及注释\n'
def createDataSet():
group = [[1.0,1.1], [0.9,1.0], [0.1,0.2], [0.2,0.3]]
labels = ['A', 'A', 'B', 'B']
return group,labels
tile_result = np.tile([1,2],(3,4))
print(tile_result)
[[1 2 1 2 1 2 1 2]
[1 2 1 2 1 2 1 2]
[1 2 1 2 1 2 1 2]]
shape_result = tile_result.shape
print(shape_result)
print(shape_result[0])
print(shape_result[1])
(3, 8)
3
8
def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat ** 2
sqDistances = sqDiffMat.sum(axis = 1)
distances = sqDistances ** 0.5
sortedDistIndicies = distances.argsort()
classCount = {}
for i in range(k):
voteLabel = labels[sortedDistIndicies[i]]
classCount[voteLabel] = classCount.get(voteLabel, 0) +1
sortedClassCount = sorted(classCount.items(), key = lambda x:x[1], reverse = True)
return sortedClassCount[0][0]
def file2matrix(filename):
with open(filename,'r') as fr:
arrayLines = fr.readlines()
numberOfLines = len(arrayLines)
returnMat = np.zeros(((numberOfLines),3))
classLabelVector = []
index = 0
for line in arrayLines:
line = line.strip()
listFromLine = line.split('\t')
returnMat[index,:] = listFromLine[0:3]
classLabelVector.append(int(listFromLine[-1]))
index += 1
return returnMat,classLabelVector
datingDataMat,datingLabels = file2matrix('datingTestSet2.txt')
datingDataMat
array([[ 4.09200000e+04, 8.32697600e+00, 9.53952000e-01],
[ 1.44880000e+04, 7.15346900e+00, 1.67390400e+00],
[ 2.60520000e+04, 1.44187100e+00, 8.05124000e-01],
...,
[ 2.65750000e+04, 1.06501020e+01, 8.66627000e-01],
[ 4.81110000e+04, 9.13452800e+00, 7.28045000e-01],
[ 4.37570000e+04, 7.88260100e+00, 1.33244600e+00]])
datingLabels[0:20]
[3, 2, 1, 1, 1, 1, 3, 3, 1, 3, 1, 1, 2, 1, 1, 1, 1, 1, 2, 3]
def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = np.zeros(np.shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - np.tile(minVals, (m,1))
normDataSet = normDataSet/np.tile(ranges, (m,1))
return normDataSet, ranges, minVals
normMat, ranges, minVals = autoNorm(datingDataMat)
normMat
array([[ 0.44832535, 0.39805139, 0.56233353],
[ 0.15873259, 0.34195467, 0.98724416],
[ 0.28542943, 0.06892523, 0.47449629],
...,
[ 0.29115949, 0.50910294, 0.51079493],
[ 0.52711097, 0.43665451, 0.4290048 ],
[ 0.47940793, 0.3768091 , 0.78571804]])
def datingClassTest(k):
hoRatio = 0.1
datingDataMat,datingLabels = file2matrix('datingTestSet2.txt')
normMat, ranges, minVals = autoNorm(datingDataMat)
m = normMat.shape[0]
numTestVecs = int(m*hoRatio)
errorCount = 0.0
for i in range(numTestVecs):
classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],\
datingLabels[numTestVecs:m], k)
print("the classifier came back with: {0},the real answer in :{1}\
".format(classifierResult, datingLabels[i]))
if(classifierResult != datingLabels[i]):
errorCount += 1.0
print("the total error rate is: {0}".format(errorCount/float(numTestVecs)))
datingClassTest(3)
the classifier came back with: 3,the real answer in :3
the classifier came back with: 2,the real answer in :2
the classifier came back with: 1,the real answer in :1
the classifier came back with: 1,the real answer in :1
the classifier came back with: 1,the real answer in :1
the classifier came back with: 1,the real answer in :1
the classifier came back with: 3,the real answer in :3
the classifier came back with: 3,the real answer in :3
the classifier came back with: 1,the real answer in :1
the classifier came back with: 3,the real answer in :3
the classifier came back with: 1,the real answer in :1
the classifier came back with: 1,the real answer in :1
the classifier came back with: 2,the real answer in :2
the classifier came back with: 1,the real answer in :1
the classifier came back with: 1,the real answer in :1
the classifier came back with: 1,the real answer in :1
the classifier came back with: 1,the real answer in :1
the classifier came back with: 1,the real answer in :1
the classifier came back with: 2,the real answer in :2
the classifier came back with: 3,the real answer in :3
the classifier came back with: 2,the real answer in :2
the classifier came back with: 1,the real answer in :1
the classifier came back with: 1,the real answer in :2
the classifier came back with: 3,the real answer in :3
the classifier came back with: 2,the real answer in :2
the classifier came back with: 3,the real answer in :3
the classifier came back with: 2,the real answer in :2
the classifier came back with: 3,the real answer in :3
the classifier came back with: 2,the real answer in :2
the classifier came back with: 1,the real answer in :1
the classifier came back with: 3,the real answer in :3
the classifier came back with: 1,the real answer in :1
the classifier came back with: 3,the real answer in :3
the classifier came back with: 1,the real answer in :1
the classifier came back with: 2,the real answer in :2
the classifier came back with: 1,the real answer in :1
the classifier came back with: 1,the real answer in :1
the classifier came back with: 2,the real answer in :2
the classifier came back with: 3,the real answer in :3
the classifier came back with: 3,the real answer in :3
the classifier came back with: 1,the real answer in :1
the classifier came back with: 2,the real answer in :2
the classifier came back with: 3,the real answer in :3
the classifier came back with: 3,the real answer in :3
the classifier came back with: 3,the real answer in :3
the classifier came back with: 1,the real answer in :1
the classifier came back with: 1,the real answer in :1
the classifier came back with: 1,the real answer in :1
the classifier came back with: 1,the real answer in :1
the classifier came back with: 2,the real answer in :2
the classifier came back with: 2,the real answer in :2
the classifier came back with: 1,the real answer in :1
the classifier came back with: 3,the real answer in :3
the classifier came back with: 2,the real answer in :2
the classifier came back with: 2,the real answer in :2
the classifier came back with: 2,the real answer in :2
the classifier came back with: 2,the real answer in :2
the classifier came back with: 3,the real answer in :3
the classifier came back with: 1,the real answer in :1
the classifier came back with: 2,the real answer in :2
the classifier came back with: 1,the real answer in :1
the classifier came back with: 2,the real answer in :2
the classifier came back with: 2,the real answer in :2
the classifier came back with: 2,the real answer in :2
the classifier came back with: 2,the real answer in :2
the classifier came back with: 2,the real answer in :2
the classifier came back with: 3,the real answer in :3
the classifier came back with: 2,the real answer in :2
the classifier came back with: 3,the real answer in :3
the classifier came back with: 1,the real answer in :1
the classifier came back with: 2,the real answer in :2
the classifier came back with: 3,the real answer in :3
the classifier came back with: 2,the real answer in :2
the classifier came back with: 2,the real answer in :2
the classifier came back with: 3,the real answer in :1
the classifier came back with: 3,the real answer in :3
the classifier came back with: 1,the real answer in :1
the classifier came back with: 1,the real answer in :1
the classifier came back with: 3,the real answer in :3
the classifier came back with: 3,the real answer in :3
the classifier came back with: 1,the real answer in :1
the classifier came back with: 2,the real answer in :2
the classifier came back with: 3,the real answer in :3
the classifier came back with: 3,the real answer in :1
the classifier came back with: 3,the real answer in :3
the classifier came back with: 1,the real answer in :1
the classifier came back with: 2,the real answer in :2
the classifier came back with: 2,the real answer in :2
the classifier came back with: 1,the real answer in :1
the classifier came back with: 1,the real answer in :1
the classifier came back with: 3,the real answer in :3
the classifier came back with: 2,the real answer in :3
the classifier came back with: 1,the real answer in :1
the classifier came back with: 2,the real answer in :2
the classifier came back with: 1,the real answer in :1
the classifier came back with: 3,the real answer in :3
the classifier came back with: 3,the real answer in :3
the classifier came back with: 2,the real answer in :2
the classifier came back with: 1,the real answer in :1
the classifier came back with: 3,the real answer in :1
the total error rate is: 0.05