机器学习实战-KNN算法-20

机器学习实战-KNN算法-鸢尾花分类

# 导入算法包以及数据集
from sklearn import neighbors
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import random
# 载入数据
iris = datasets.load_iris()
print(iris)

机器学习实战-KNN算法-20_第1张图片

# 打乱数据切分数据集
# x_train,x_test,y_train,y_test = train_test_split(iris.data, iris.target, test_size=0.2) #分割数据0.2为测试数据,0.8为训练数据

#打乱数据
data_size = iris.data.shape[0]
index = [i for i in range(data_size)] 
random.shuffle(index)  
iris.data = iris.data[index]
iris.target = iris.target[index]

#切分数据集
test_size = 40
x_train = iris.data[test_size:]
x_test =  iris.data[:test_size]
y_train = iris.target[test_size:]
y_test = iris.target[:test_size]

# 构建模型
model = neighbors.KNeighborsClassifier(n_neighbors=3)
model.fit(x_train, y_train)
prediction = model.predict(x_test)

print(classification_report(y_test, prediction))

机器学习实战-KNN算法-20_第2张图片

机器学习实战-KNN算法-水果分类

机器学习实战-KNN算法-20_第3张图片

from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

机器学习实战-KNN算法-20_第4张图片

data = pd.read_csv('fruit_data.csv')
data

机器学习实战-KNN算法-20_第5张图片

labelencoder = LabelEncoder()
data.iloc[:,0] = labelencoder.fit_transform(data.iloc[:,0])
data

机器学习实战-KNN算法-20_第6张图片

labelencoder.classes_

在这里插入图片描述

from sklearn.model_selection import train_test_split
# 切分数据集,stratify=y表示切分后训练集和测试集中的数据类型的比例跟切分前y中的比例一致
# 比如切分前y中0和1的比例为1:2,切分后y_train和y_test中0和1的比例也都是1:2
# 设置random_state,使用同样的随机方式来切分数据
x_train,x_test,y_train,y_test = train_test_split(data.iloc[:,1:], data.iloc[:,0], test_size=0.3, stratify=data.iloc[:,0], random_state=20)
# 保存不同k值测试集准确率
test_scores = []
# 保存不同k值训练集准确率
train_scores = []

# 设置30个k值
k = 30
for i in range(1,k):
    knn = KNeighborsClassifier(i)
    knn.fit(x_train,y_train)
    # 保存测试集准确率
    test_scores.append(knn.score(x_test,y_test))
    # 保存训练集准确率
    train_scores.append(knn.score(x_train,y_train))
plt.title('k-NN Varying number of neighbors')
plt.plot(range(1,k),test_scores,label="Test")
plt.plot(range(1,k),train_scores,label="Train")
plt.legend()
plt.xticks(range(1,k))
plt.xlabel('k')
plt.ylabel('accuracy')
plt.show()

机器学习实战-KNN算法-20_第7张图片

# 选择一个最好的k值作为模型参数
k = np.argmax(test_scores)+1
knn = KNeighborsClassifier(k)
knn.fit(x_train,y_train)
print(k)
print(knn.score(x_test,y_test))

在这里插入图片描述

你可能感兴趣的:(机器学习,算法,机器学习,python,大数据)