Knn-案例

from numpy import *
import operator
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
def createDataset():
    group = array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
    labels = ['A','A','B','B']
    return group,labels
def KnnClassify(testX,trainX,labels,K):
    [N,M]=trainX.shape    
    difference = tile(testX,(N,1)) - trainX
    difference = difference ** 2
    distance = difference.sum(1)
    distance = distance ** 0.5
    sortdiffidx = distance.argsort()
    vote = {}
    for i in range(K):
        ith_label = labels[sortdiffidx[i]];
        vote[ith_label] = vote.get(ith_label,0)+1
    sortedvote = sorted(vote.iteritems(),key = lambda x:x[1], reverse = True)
    return sortedvote[0][0]
#geshi
def file2matrix(filename):
    fr = open(filename)
    arrayOLines = fr.readlines()
    numberOfLines = len(arrayOLines)
    returnMat = zeros((numberOfLines, 3))
    print numberOfLines
    classLabelVector = []
    index = 0.0
    for line in arrayOLines:
        line = line.strip()
        listFromLine = line.split('\t')
        returnMat[index,:] = listFromLine[0:3]
        classLabelVector.append(int(listFromLine[-1]))
        index += 1.0
    return returnMat, classLabelVector
def gragh(dm,dl):
    fig = plt.figure()
    ax = fig.add_subplot(111,projection='3d')
    ax = fig.add_subplot(111)
    #X = [1, 1, 2, 2]
    #Y = [3, 4, 4, 3]
    #Z = [1, 2, 1, 1]
    #ax.plot_trisurf(X,Y,Z)
    ax.scatter(dm[:,0],dm[:,2],15.0*array(dl),15.0*array(dl))
    plt.show()
def autoNorm(dataSet):
    minVals = dataSet.min(0)
    maxVals = dataSet.max(0)
    ranges = maxVals - minVals
    normDataSet = zeros(shape(dataSet))
    m = dataSet.shape[0]
    normDataSet = dataSet - tile(minVals,(m,1))
    normDataSet = normDataSet/tile(ranges,(m,1))
    return normDataSet,ranges,minVals
def datingClassTest(datingDataMat,datingLabels,normMat, ranges, minVals):
    hoRatio = 0.50
    m = normMat.shape[0]
    numTestVecs = int(m*hoRatio)
    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = KnnClassify(normMat[i,:], normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)
        print 'the classifier came back with: %d, the real answer is: %d' %(classifierResult, datingLabels[i])
        if(classifierResult != datingLabels[i]): errorCount += 1.0
    print "the total error rate is: %f " % (errorCount/float(numTestVecs))
    #gragh(dm,dl)
def classifyPerson(datingDataMat,datingLabels,normMat,ranges,minVals):
    resultList = ['not at all', 'in small doses', 'in large doses']
    percentTats = float(raw_input("percentage of time spent playing video games?"))
    ffMiles = float(raw_input("frequent flier miles earned per year?"))
    iceCream = float(raw_input("liters of ice cream consumed per year?"))
    inArr = array([ffMiles, percentTats, iceCream])
    classifierResult = KnnClassify((inArr - minVals)/ranges, normMat, datingLabels,3)
    print 'You will probably like this person: ', resultList[classifierResult - 1] 

你可能感兴趣的:(机器学习)