import numpy as np
import matplotlib.pylab as pyb
%matplotlib inline
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets
X,y = datasets.load_iris(True)
# 4个属性,4维空间,4维的数据,鸢尾花是四维的。
# 150代表样本的数量
X.shape
# 降维,切片:简单粗暴方式(信息量变少了)
X = X[:,:2]
X.shape
pyb.scatter(X[:,0],X[:,1],c = y)#c是颜色 y代表目标值的[0,1,2]类别,颜色随机分配
knn = KNeighborsClassifier(n_neighbors=5)
# 使用150个样本点作为训练数据
knn.fit(X,y)
# 训练数据
X.shape
# 测试数据 shape (?,2)
# 获取测试数据
# 横坐标4 ~ 8;纵坐标 2~ 4.5
# 背景点,取出来,meshgrid
x1 = np.linspace(4,8,100)
y1 = np.linspace(2,4.5,80)
X1,Y1 = np.meshgrid(x1,y1)
display(x1.shape,y1.shape)
pyb.scatter(x1,y1)
因为训练数据是(150,2)所以测试数据也应该是(?,2)。
# 平铺,一维化,reshape
X_test = np.c_[X1.ravel(),Y1.ravel()]
X_test.shape
8000个样本,每个样本2个属性[鸢尾花萼的长度和宽度]。
%%time
y_ = knn.predict(X_test)
from matplotlib.colors import ListedColormap
lc = ListedColormap(['#FFAAAA','#AAFFAA','#AAAAFF'])
lc2 = ListedColormap(['#FF0000','#00FF00','#0000FF'])
%%time
pyb.scatter(X_test[:,0],X_test[:,1],c = y_,cmap = lc)
pyb.scatter(X[:,0],X[:,1],c = y,cmap = lc2)
%%time
pyb.contourf(X1,Y1,y_.reshape(80,100),cmap = lc)
pyb.scatter(X[:,0],X[:,1],c = y,cmap = lc2)