KNN - 基于tensorflow实现程序

本文之编写程序涉及到API介绍,程序的完整实现,具体算法原理请查看之前所写的KNN算法介绍

一、基础准备

1、python 基础

2、numpy 基础

np.argmax
返回数组的最大值

>>> a = np.arange(6).reshape(2,3)
>>> a.argmax()
5
>>> a.argmax(0)
array([1, 1, 1])
>>> a.argmax(1)
array([2, 2])

3、tensorflow 基础

tf.arg_max
求数组最大值的下标,如axis=0,代表第一维度,8<2:0,1>3:1,2>4:1 =[0,1,1],如axis=1,代表第二维度,[8,1,2]:0,[2,3,4]:2 = [0 2]

data = tf.constant([[8,1,2],[2,3,4]])
sess = tf.Session()
print(sess.run(tf.arg_max(data,0)))
# >> [0 1 1]
print(sess.run(tf.arg_max(data,1)))
# >>[0 2]

tf.arg_min
与arg_min相反

二、完整程序

import tensorflow as tf
import numpy as np

def file2Mat(testFileName, parammterNumber):
    fr = open(testFileName)
    lines = fr.readlines()
    lineNums = len(lines)
    resultMat = np.zeros((lineNums, parammterNumber))
    classLabelVector = []
    for i in range(lineNums):
        line = lines[i].strip()
        itemMat = line.split('\t')
        resultMat[i, :] = itemMat[0:parammterNumber]
        classLabelVector.append(itemMat[-1])
    fr.close()
    return resultMat, classLabelVector;

# 为了防止某个属性对结果产生很大的影响,所以有了这个优化,比如:10000,4.5,6.8 10000就对结果基本起了决定作用
def autoNorm(dataSet):
    minVals = dataSet.min(0)
    maxVals = dataSet.max(0)
    ranges = maxVals - minVals
    normMat = np.zeros(np.shape(dataSet))
    size = normMat.shape[0]
    normMat = dataSet - np.tile(minVals, (size, 1))
    normMat = normMat / np.tile(ranges, (size, 1))
    return normMat, minVals, ranges

if __name__=='__main__':

    trainigSetFileName = 'data\\datingTrainingSet.txt'
    testFileName = 'data\\datingTestSet.txt'

    # 读取训练数据
    trianingMat, classLabel = file2Mat(trainigSetFileName, 3)
    # 都数据进行归一化的处理
    autoNormTrianingMat, minVals, ranges = autoNorm(trianingMat)
    # 读取测试数据
    testMat, testLabel = file2Mat(testFileName, 3)
    autoNormTestMat = []
    for i in range(len(testLabel)):
        autoNormTestMat.append((testMat[i] - minVals) / ranges)

    # 循环迭代计算每一个测试数据的预测值,并且和真正的值进行对比,并计算精确度。该算法比较经典的是不需要提前训练,直接在测试阶段进行识别。
    traindata_tensor=tf.placeholder('float',[None,3])
    testdata_tensor=tf.placeholder('float',[3])

    distance = tf.sqrt(tf.reduce_sum(tf.pow(tf.add(traindata_tensor, tf.negative(testdata_tensor)), 2), reduction_indices=1))
    pred = tf.arg_min(distance,0)
    test_num=1
    accuracy=0
    init=tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        for i in range(test_num):
            print(sess.run(distance,feed_dict={traindata_tensor:autoNormTrianingMat,testdata_tensor:autoNormTestMat[i]}))
            idx=sess.run(pred,feed_dict={traindata_tensor:autoNormTrianingMat,testdata_tensor:autoNormTestMat[i]})
            print(idx)

            print('test No.%d,the real label %d, the predict label %d'%(i,np.argmax(testLabel[i]),np.argmax(classLabel[idx])))
            if np.argmax(testLabel[i])==np.argmax(classLabel[idx]):
                accuracy+=1
        print("result:%f"%(1.0*accuracy/test_num))

你可能感兴趣的:(KNN - 基于tensorflow实现程序)