knn 的预测

#knn 模型:
import  numpy as np
import matplotlib.pyplot as plt

X_trian = np.array([
    [158, 64],
    [170, 86],
    [183, 84],
    [191, 80],
    [155, 49],
    [163, 59],
    [180, 67],
    [158, 54],
    [170, 67]
])
y_train = ['male','male','male','male','female','female','female','female','female' ]
plt.figure()#可以用来改变图的一些特征
plt.title("hunam heights and weights by sex ")
plt.xlabel("height in cm ")
plt.ylabel("weight in kg ")
#x y 轴分别是 重量和升高,y_train 里面是性别
for i ,x  in enumerate(X_trian):
    #x[0]里面是身高 , x[1]里面是体重
    plt.scatter(x[0] , x[1], s= 40,c=  'b'  , marker= "x" if y_train[i] == 'male' else 'D' , cmap="colormap" )
    #matplotlib.pyplot.scatter(x, y, s=None, c=None, marker=None, cmap=None, norm=None, vmin=None, vmax=None, alpha=None,linewidths=None, verts=None, edgecolors=None,  data=None,  kwargs)
    #plt.scatter(x,y, s=None, c=None, marker=None, cmap=None, norm=None, vmin=None, vmax=None, alpha=None,linewidths=None, verts=None, edgecolors=None,  data=None)


plt.grid(True)#生成网格

plt.show()
#上面只是一个简单的绘图:
#下面可以来计算他们的距离了
#导入与预测数据
x= np.array([
    [155,70]
    ])
#计算每个人 和预测数据距离的大小
distances = np.sqrt(np.sum((X_trian - x )**2 , axis=1 ))#我觉得这一步真的棒,能直接算出里面的距离。
print(distances)
#然后就可以进行筛选了:筛选奇数个人的
nearest_neighbor_indices  = distances.argsort()[:3]
#下面这个函数就是正真的计算了:将他们
nearest_neighbor_genders = np.take(y_train , nearest_neighbor_indices)
#打印离他最近的三个人的性别 take 实现了将距离转换为预测值的功能
print("take后的数据",nearest_neighbor_genders)
#打印距离最近的三人的距离的下标(没什么作用,
print(distances.argsort()[:3])

from  collections import  Counter
#让机器人通过最近人的数据预测人物性别
b = Counter(np.take(y_train ,distances.argsort()[:3]))#进行了排序实际上就像b打印出来那样
print("b=",b)
b.most_common(1)
print(b.most_common(1))
print(b.most_common(1)[0][0])
print(b.most_common(2))
print(b.most_common(2)[0][0],b.most_common(2)[1][0],b.most_common(2)[0][1],b.most_common(2)[1][1])
#Counter(a).most_common(2)可以打印出数组中出现次数最多的元素。参数2表示的含义是:输出几个出现次数最多的元素。但是他输出的是元组,我们只需要第一个性别就可,所以需要加上[0][0]

from  sklearn.preprocessing import  LabelBinarizer
from  sklearn.neighbors import KNeighborsClassifier


# 实例化标签二进制化
#lb = LabelBinarizer()
# 将y_train转化为二进制
#创建实例
lb = LabelBinarizer()
#转化为什么东西,应该是矩阵二进制矩阵
y_train_binarized = lb.fit_transform(y_train)
print(y_train_binarized)
k=3

clf = KNeighborsClassifier( n_neighbors = k )#创建一个knn算法的使用对象的感觉并指定了临近数的大小
print("---clf1-- :",clf)
data = clf.fit(X_trian , y_train_binarized.reshape(-1))#训练样本,不管y_train_binarized 变成只有一列,并进行fit操作(转化成同意模型)这只是创建对象和统一化的过程
print("---clf2-- :",data)
prediction_binarized = clf.predict(np.array([155, 70 ]).reshape(1,-1))[0] #reshape(-1,1)表示(任意行,1行,任意列(-1为任意))反正就是预测,先不管那么多。
print("prediction_binarized",prediction_binarized) #看结果就知道为什么要用inverse 了
predicted_lable = lb.inverse_transform(prediction_binarized) #转换成原始数据
print(predicted_lable)


print("------下面是一个小测试-------不用管-----------")
from sklearn import preprocessing
import numpy as np
X = np.array([[ 1., -1.,  2.],[ 2.,  0.,  0.],[ 0.,  1., -1.]])
print(X )
scaler= preprocessing.MinMaxScaler(feature_range=(-1, 1)).fit(X)
print(scaler)
print(X)
X_scaled = scaler.transform(X)
print(X)
print(X_scaled)
X1=scaler.inverse_transform(X_scaled)
print(X1)
print(X1[0, -1])
print("------------------------------------------")

结果:

[ 6.70820393 21.9317122  31.30495168 37.36308338 21.         13.60147051
 25.17935662 16.2788206  15.29705854]
take后的数据 ['male' 'female' 'female']
[0 5 8]
b= Counter({'female': 2, 'male': 1})
[('female', 2)]
female
[('female', 2), ('male', 1)]
female male 2 1
[[1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]]
---clf1-- : KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')
---clf2-- : KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')
prediction_binarized 0
['female']
------下面是一个小测试-------不用管-----------
[[ 1. -1.  2.]
 [ 2.  0.  0.]
 [ 0.  1. -1.]]
MinMaxScaler(copy=True, feature_range=(-1, 1))
[[ 1. -1.  2.]
 [ 2.  0.  0.]
 [ 0.  1. -1.]]
[[ 1. -1.  2.]
 [ 2.  0.  0.]
 [ 0.  1. -1.]]
[[ 0.         -1.          1.        ]
 [ 1.          0.         -0.33333333]
 [-1.          1.         -1.        ]]
[[ 1. -1.  2.]
 [ 2.  0.  0.]
 [ 0.  1. -1.]]
2.0
------------------------------------------

Process finished with exit code 0
#下面是一些计算正确率的:
print( "***********************让我们来看正确率吧:****************")
print("先看准确率:")
print("准确率是针对结果而言的,就是所有实际正的样本中,预测对了的分值,tp/tp+fn")
X_test = np.array([
    [168 , 65],
    [180 , 96],
    [160 , 52],
    [169 , 67]
])
y_test = ['male' , 'male' , 'female' , 'female']
#转化成某种形式
y_test_binarzed = lb.transform(y_test)
#这就是转换后的形式
print("Binarized predictions : %s"% y_test_binarzed.T[0])#将数据转换成了01 的形式方便查看
print("开始预测:")
prediction_binarized = clf.predict(X_test)#预测值,先不管他是怎么实现的(预测结果)
print("Binarizd labels : %s"% prediction_binarized)
print("prediceted lables :%s"% lb.inverse_transform(prediction_binarized))
print(" 预测结束")

print("&&&&&&开始比较&&&&&,\n ")
from  sklearn.metrics import  accuracy_score
print("计算准确率:")
print("Accuracy :%s "%accuracy_score(y_test_binarzed , prediction_binarized))
#计算精准率:
from sklearn.metrics import  precision_score
print("计算精准率")
print('precision: %s :'% precision_score(y_test_binarzed , prediction_binarized))
from  sklearn.metrics import recall_score
print("recall: %s"%recall_score(y_test_binarzed , prediction_binarized))
print("计算f1 (我也不知道是什么)")
from  sklearn.metrics import  f1_score
print("f1_score :%s "%f1_score(y_test_binarzed ,prediction_binarized))
print("马修系数")
from sklearn.metrics import matthews_corrcoef
print("matters correlation :%s "%matthews_corrcoef(y_test_binarzed ,prediction_binarized))
print("########计算各种率########")
from sklearn.metrics import  classification_report
print(classification_report(y_test_binarzed ,prediction_binarized , target_names = ['name'],labels=[1]))
D:\python\python.exe D:/resu/python_test/打的程序/knn.py
[ 6.70820393 21.9317122  31.30495168 37.36308338 21.         13.60147051
 25.17935662 16.2788206  15.29705854]
take后的数据 ['male' 'female' 'female']
[0 5 8]
b= Counter({'female': 2, 'male': 1})
[('female', 2)]
female
[('female', 2), ('male', 1)]
female male 2 1
[[1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]]
---clf1-- : KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')
---clf2-- : KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')
prediction_binarized 0
['female']
------下面是一个小测试-------不用管-----------
[[ 1. -1.  2.]
 [ 2.  0.  0.]
 [ 0.  1. -1.]]
MinMaxScaler(copy=True, feature_range=(-1, 1))
[[ 1. -1.  2.]
 [ 2.  0.  0.]
 [ 0.  1. -1.]]
[[ 1. -1.  2.]
 [ 2.  0.  0.]
 [ 0.  1. -1.]]
[[ 0.         -1.          1.        ]
 [ 1.          0.         -0.33333333]
 [-1.          1.         -1.        ]]
[[ 1. -1.  2.]
 [ 2.  0.  0.]
 [ 0.  1. -1.]]
2.0
------------------------------------------
***********************让我们来看正确率吧:****************
先看准确率:
准确率是针对结果而言的,就是所有实际正的样本中,预测对了的分值,tp/tp+fn
Binarized predictions : [1 1 0 0]
开始预测:
Binarizd labels : [0 1 0 0]
prediceted lables :['female' 'male' 'female' 'female']
 预测结束
&&&&&&开始比较&&&&&,
 
计算准确率:
Accuracy :0.75 
计算精准率
precision: 1.0 :
recall: 0.5
计算f1 (我也不知道是什么)
f1_score :0.6666666666666666 
马修系数
matters correlation :0.5773502691896258 
########计算各种率########
              precision    recall  f1-score   support

        name       1.00      0.50      0.67         2

   micro avg       1.00      0.50      0.67         2
   macro avg       1.00      0.50      0.67         2
weighted avg       1.00      0.50      0.67         2

你可能感兴趣的:(python,笔记,数据挖掘)