from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
movie = pd.read_excel("/Users/zhucan/Desktop/movies.xlsx",sheet_name = 1)
data = movie.iloc[:,1:3]
target = movie['分类情况']
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(data,target)
X_test = pd.DataFrame({"武打镜头":[100,67,1],"接吻镜头":[3,2,10]})
print(knn.predict(X_test))
['动作片' '动作片' '爱情片']
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets
iris = datasets.load_iris()
x = iris["data"]
y = iris["target"]
index = np.arange(150)
np.random.shuffle(index)
x_train,x_test = x[index[:100]],x[index[100:]]
y_train,y_test = y[index[:100]],y[index[100:]]
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(x_train,y_train)
y_=knn.predict(x_test)
print(y_)
print("--------------------")
print(y_test)
[1 1 0 1 2 1 1 0 0 0 2 0 0 1 1 2 1 2 0 1 0 0 2 0 1 2 0 2 0 0 0 0 1 2 1 1 1
0 0 0 0 1 2 2 2 1 1 2 1 1]
--------------------
[1 1 0 1 2 1 1 0 0 0 2 0 0 1 1 2 1 2 0 1 0 0 2 0 1 2 0 2 0 0 0 0 1 2 1 1 1
0 0 0 0 1 1 2 2 1 1 2 1 1]
print(knn.score(x_test,y_test))
0.96
proba_ = knn.predict_proba(x_test)
print(proba_)
print(proba_.argmax(axis=1))
print(y_)
[[0. 1. 0. ]
[1. 0. 0. ]
[0. 0. 1. ]
[1. 0. 0. ]
[0. 1. 0. ]
[0. 1. 0. ]
[0. 0. 1. ]
[0. 1. 0. ]
[1. 0. 0. ]
[0. 0. 1. ]
[0. 0.2 0.8]
[1. 0. 0. ]
[0. 1. 0. ]
[0. 1. 0. ]
[1. 0. 0. ]
[0. 0. 1. ]
[0. 0. 1. ]
[0. 0. 1. ]
[1. 0. 0. ]
[1. 0. 0. ]
[0. 1. 0. ]
[1. 0. 0. ]
[0. 0. 1. ]
[0. 0. 1. ]
[1. 0. 0. ]
[0. 0.2 0.8]
[0. 1. 0. ]
[0. 0.2 0.8]
[1. 0. 0. ]
[1. 0. 0. ]
[0. 1. 0. ]
[1. 0. 0. ]
[0. 0. 1. ]
[0. 1. 0. ]
[0. 1. 0. ]
[0. 0. 1. ]
[1. 0. 0. ]
[0. 0. 1. ]
[0. 0. 1. ]
[1. 0. 0. ]
[0. 0. 1. ]
[1. 0. 0. ]
[1. 0. 0. ]
[1. 0. 0. ]
[0. 0. 1. ]
[0. 1. 0. ]
[1. 0. 0. ]
[0. 1. 0. ]
[0. 1. 0. ]
[0. 0. 1. ]]
[1 0 2 0 1 1 2 1 0 2 2 0 1 1 0 2 2 2 0 0 1 0 2 2 0 2 1 2 0 0 1 0 2 1 1 2 0
2 2 0 2 0 0 0 2 1 0 1 1 2]
[1 0 2 0 1 1 2 1 0 2 2 0 1 1 0 2 2 2 0 0 1 0 2 2 0 2 1 2 0 0 1 0 2 1 1 2 0
2 2 0 2 0 0 0 2 1 0 1 1 2]
knn = KNeighborsClassifier(n_neighbors=5,weights="uniform"/"distance",p=1/2)
p=1曼哈顿距离
p=2欧氏距离
n_neighbors最好不要超过样本数量开平方
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
digit = cv2.imread("./data/0/0_101.bmp")
digit = cv2.cvtColor(digit,code=cv2.COLOR_BGR2GRAY)
X=[]
for i in range(10):
for j in range(1,501):
digit = cv2.imread("./data/%d/%d_%d.bmp"%(i,i,j))
X.append(digit[:,:,0])
#数据X和目标值y是一一对应
X = np.asarray(X)
y = [i for i in range(10)]*500
y.sort()
index = np.random.randint(0,5000,size=1)[0]
digit = X[index]
print(y[index])
plt.imshow(digit,cmap=plt.cm.gray)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
X_train = X_train.reshape(4000,-1)
knn = KNeighborsClassifier(n_neighbors=63)
knn.fit(X_train,y_train)
X_test = X_test.reshape(1000,-1)
y_ = knn.predict(X_test)
knn.score(X_test,y_test)
二值化操作
for i in range(5000):
for y in range(28):
for x in range(28):
if X[i][y,x] < 200:
X[i][y,x] = 0
else:
X[i][y, x] = 0
train_test_split(X,y,test_size=0.2,random_state=100)