机器学习之用sklearn实现knn

K近邻的详细算法,建议参考 李航老师的统计学习方法。下面是python3的代码

 
   
from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier

import  numpy as np

data = []
with open('E:\python文件\python机器学习实践指南/iris数据.txt') as txtData:
    lines = txtData.readlines()
    for line in lines:
        lineData = line.strip().split(',')  # 去除空白和逗号“,”
        data.append(lineData)
 
   
 
   
虽然sklearn里面已经有iris的数据 ,但是我还是用python读取txt文件中的数据,因为数据的预处理往往要花费更多的时间以此方法来得到训练
data.pop(0)   #删除花宽等文字

# print(data)
arr = np.array(data)#将列表转化成数组型
np.reshape(arr,-1,5)#把一行的数组转化成n5
# print(arr)
iris_X = arr[:,:4]#前四列作为输入

iris_y0= (arr[:,4:])#最后一列作为输出

l2 = []

for i in iris_y0:
    if i =='Iris-setosa':
        i = 0
        l2.append(i)
    elif i =='Iris-versicolor':
        i = 1
        l2.append(i)
    else:
        i = 2
        l2.append(i)


# print(l2)
iris_y = np.array(l2)
np.reshape(iris_y,-1,1)
# print(iris_y)

# print(iris_X)
#
#
#
# print(iris_y)

X_train,X_test,y_train,y_test =train_test_split(iris_X, iris_y, test_size=0.3)

# print(y_train)
knn = KNeighborsClassifier()

knn.fit(X_train, y_train)

print(knn.predict(X_test))

print(y_test)
count = 0
i = 0

if knn.predict(X_test)[i]!=y_test[i]:
    count +=1
    i = i+1

c = count /len(y_test)
print(c)


你可能感兴趣的:(机器学习)