from numpy import *
import operator
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
def createDataset():
group = array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels = ['A','A','B','B']
return group,labels
def KnnClassify(testX,trainX,labels,K):
[N,M]=trainX.shape
difference = tile(testX,(N,1)) - trainX
difference = difference ** 2
distance = difference.sum(1)
distance = distance ** 0.5
sortdiffidx = distance.argsort()
vote = {}
for i in range(K):
ith_label = labels[sortdiffidx[i]];
vote[ith_label] = vote.get(ith_label,0)+1
sortedvote = sorted(vote.iteritems(),key = lambda x:x[1], reverse = True)
return sortedvote[0][0]
#geshi
def file2matrix(filename):
fr = open(filename)
arrayOLines = fr.readlines()
numberOfLines = len(arrayOLines)
returnMat = zeros((numberOfLines, 3))
print numberOfLines
classLabelVector = []
index = 0.0
for line in arrayOLines:
line = line.strip()
listFromLine = line.split('\t')
returnMat[index,:] = listFromLine[0:3]
classLabelVector.append(int(listFromLine[-1]))
index += 1.0
return returnMat, classLabelVector
def gragh(dm,dl):
fig = plt.figure()
ax = fig.add_subplot(111,projection='3d')
ax = fig.add_subplot(111)
#X = [1, 1, 2, 2]
#Y = [3, 4, 4, 3]
#Z = [1, 2, 1, 1]
#ax.plot_trisurf(X,Y,Z)
ax.scatter(dm[:,0],dm[:,2],15.0*array(dl),15.0*array(dl))
plt.show()
def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = zeros(shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - tile(minVals,(m,1))
normDataSet = normDataSet/tile(ranges,(m,1))
return normDataSet,ranges,minVals
def datingClassTest(datingDataMat,datingLabels,normMat, ranges, minVals):
hoRatio = 0.50
m = normMat.shape[0]
numTestVecs = int(m*hoRatio)
errorCount = 0.0
for i in range(numTestVecs):
classifierResult = KnnClassify(normMat[i,:], normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)
print 'the classifier came back with: %d, the real answer is: %d' %(classifierResult, datingLabels[i])
if(classifierResult != datingLabels[i]): errorCount += 1.0
print "the total error rate is: %f " % (errorCount/float(numTestVecs))
#gragh(dm,dl)
def classifyPerson(datingDataMat,datingLabels,normMat,ranges,minVals):
resultList = ['not at all', 'in small doses', 'in large doses']
percentTats = float(raw_input("percentage of time spent playing video games?"))
ffMiles = float(raw_input("frequent flier miles earned per year?"))
iceCream = float(raw_input("liters of ice cream consumed per year?"))
inArr = array([ffMiles, percentTats, iceCream])
classifierResult = KnnClassify((inArr - minVals)/ranges, normMat, datingLabels,3)
print 'You will probably like this person: ', resultList[classifierResult - 1]