KNN算法的实现,利用数据集processed.cleveland.data
UCI公开数据集-heart disease,属性信息如下:
训练数据集以及KNN函数的编写
import numpy as np
import operator
f=open('D:/Tencent/QQ/qq文件/WeChat Files/w384660107/FileStorage/File/2020-05/processed.cleveland.data')
age=[]
sex=[]
cp=[]
tresbps=[]
chol=[]
fbs=[]
restecg=[]
thalach=[]
exang=[]
oldpeak=[]
slope=[]
ca=[]
thal=[]
num=[]
for i,d in enumerate(f):
d=d.strip()
if not d:
continue
d=list(map(float,d.split(',')))
age.append(d[0])
sex.append(d[1])
cp.append(d[2])
tresbps.append(d[3])
chol.append(d[4])
fbs.append(d[5])
restecg.append(d[6])
thalach.append(d[7])
exang.append(d[8])
oldpeak.append(d[9])
slope.append(d[10])
ca.append(d[11])
thal.append(d[12])
num.append(d[13])
group=np.empty(shape=[0,13],dtype=float)
labels=np.empty(shape=[0,1],dtype=float)
for i in range(0,303):
group=np.append(group,[[age[i],sex[i],cp[i],tresbps[i],chol[i],fbs[i],restecg[i],thalach[i],exang[i],oldpeak[i],slope[i],ca[i],thal[i]]],axis=0)
labels=np.append(labels,[[num[i]]],axis=0)
print(group.shape)
print(labels.shape)
print("我想我明白12345229")
def kNN_Classify(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize, 1)) - dataSet
#关于tile函数的用法
#>>> b=[1,3,5]
#>>> tile(b,[2,3])
#array([[1, 3, 5, 1, 3, 5, 1, 3, 5],
# [1, 3, 5, 1, 3, 5, 1, 3, 5]])
sqDiffMat = diffMat ** 2
sqDistances = sum(sqDiffMat)
distances = sqDistances ** 0.5 # 算距离
sortedDistIndicies =np.argsort(distances)
#关于argsort函数的用法
#argsort函数返回的是数组值从小到大的索引值
#>>> x = np.array([3, 1, 2])
#>>> np.argsort(x)
#array([1, 2, 0])
classCount = {} #定义一个字典
# 选择k个最近邻
for i in range(k):
voteLabel = labels[sortedDistIndicies[i]]
# 计算k个最近邻中各类别出现的次数
classCount[voteLabel] = sortedDistIndicies + 1
#返回出现次数最多的类别标签i
maxCount = 0
for key, value in classCount.items():
if value > maxCount:
maxCount = value
maxIndex = key
return maxIndex
KNN函数的实现以及对目标样本的分析
import numpy as np
dataSet=group
test=np.array([61.0,1.0,1.0,134.0,234.0,0.0,0.0,145.0,0.0,2.6,2.0,2.0,3.0])
k=1
outputLabel = kNN_Classify(test, dataSet, labels,1)
print("你输入的样本我想我明白925", test, "wsx心血管病的类别为我想我明白123 ", outputLabel)