KNN分类算法(K-Nearest-Neighbors Classification),又叫K邻近算法,是一个概念极其简单,而效果有很优秀的分类算法。
核心思想:如果一个样本在特征空间中的k个最相似(即特征空间中最邻近)的样本中的大多数属于某一个类别,则该样本也属于这个类别。
# 测试准确率
from sklearn.metrics import classification_report
y_true = [0,1,2,2,2] #真实值
y_pred = [0,0,2,2,1] #预测值
print(classification_report(y_true,y_pred))
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-vAMS2CYQ-1671292858946)(KNN2.png)]from sklearn.datasets import load_boston
boston = load_boston()
数据集目录:anacnda3\Lib\site-packages\sklearn\datasets\datafrom sklearn.datasets import load_iris
iris_dataset = load_iris()
iris_da
#下面是查看数据的各项属性
print("数据集的Keys:\n",iris_dataset.keys()) #查看数据集的keys。
print("特征名:\n",iris_dataset['feature_names']) #查看数据集的特征名称
print("数据类型:\n",type(iris_dataset['data'])) #查看数据类型
print("数据维度:\n",iris_dataset['data'].shape) #查看数据的结构
print("前五条数据:\n{}".format(iris_dataset['data'][:5]))#查看前5条数据
#查看分类信息
print("标记名:\n",iris_dataset['target_names'])
print("标记类型:\n",type(iris_dataset['target']))
print("标记维度:\n",iris_dataset['target'].shape)
print("标记值:\n",iris_dataset['target'])
#查看数据集的简介
print('数据集简介:\n',iris_dataset['DESCR'][:20] + "\n.......")#查看数据集的简介的前20个字符。
# 拆分数据集
from sklearn.datasets import load_iris
form sklearn.model_selection import train_test_split
irsi_dataset = load_iris()
X_train,X_test,y_train,y_test = train_test_split(iris_dataset['data'],iris_dataset['target'],random_state=2)
print("X_train",X_train)
print("y_train",y_train)
print("X_test",X_test)
print("y_test",y_test)
print("X_train shape: {}".format(X_train.shape))
print("X_test shape: {}".format(X_test.shape))
import pandas as pd
iris_dataframe = pd.DataFrom(X_train,columns=iris.dataset.feature_names)
pd.plotting.scatter_matrix(iris_dataframe,alpha=0.8,c=y_train,figsize=(15,15),marker='o',hist_kwd={'bins':20},s=60) #bins 参数的含义是所画出的直方图的“柱”的个数;每个“柱”的值为其跨越的值的个数和。
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
#导入数据,查看数据特征
iris = datasets.load_iris()
print('数据集结构:',iris.data.shape)
#获取属性
iris_X = iris.data
#获取类别
iris_y = iris.target
#划分成测试集和训练集
X_train,X_test,y_train,y_test = train_test_split(iris_X,iris_y,test_size=0.2,random_state=0)
#分类器初始化
knn = KNeighborsClassifier()
# 对训练集进行训练
knn.fit(X_train,y_train)
# 对测试集数据进行预测
predict_result = knn.predict(X_test)
print('测试集大小:',X_test.shape)
print('真实结果:',y_test)
print('预测结果:',predict_result)
print('预测精确率',knn.score(X_test,y_test))
#因为本题是将所有数据作为训练集,测试集仅单独指出,所以省略出 拆分数据集。
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
fruits = np.loadtxt("fruit_data.txt")
X_train = fruits[:,1:]
y_train = fruits[:,0]
X_test = [[192,8.4,7.3,0.55],[200,7.3,10.5,0.72]]
knn = KNeighborsClassifier(weights='distance') #k值为5 按距离远近设置权重 1 4
#knn = KNeighborsClassifier(3,weights='distance') # 1 4
#knn = KNeighborsClassifier()#5,uniform 平等权重 4 4
knn.fit(X_train,y_train)
print('预测结果:',knn.predict(X_test))
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
fruits = np.loadtxt("fruit_data.txt")
X_train,X_test,y_train,y_test = train_test_split(fruits[:,1:],fruits[:,0],test_size=0.15,random_state=0)
# X_train = fruits[:,1:]
# y_train = fruits[:,0]
# X_test = [[192,8.4,7.3,0.55],[200,7.3,10.5,0.72]]
knn = KNeighborsClassifier(weights='distance') #k值为5 按距离远近设置权重 1 4
# knn = KNeighborsClassifier(3,weights='distance') #1 4
knn.fit(X_train,y_train)
print('真实结果:',y_test)
print('预测结果:',knn.predict(X_test))
print(knn.predict([[192,8.4,7.3,0.55]]))
print(knn.predict([[200,7.3,10.5,0.72]]))
fit(X_train,y_train)
print('真实结果:',y_test)
print('预测结果:',knn.predict(X_test))
print(knn.predict([[192,8.4,7.3,0.55]]))
print(knn.predict([[200,7.3,10.5,0.72]]))