import numpy as np
import matplotlib.pyplot as plt
X_trian = np.array([
[158, 64],
[170, 86],
[183, 84],
[191, 80],
[155, 49],
[163, 59],
[180, 67],
[158, 54],
[170, 67]
])
y_train = ['male','male','male','male','female','female','female','female','female' ]
plt.figure()
plt.title("hunam heights and weights by sex ")
plt.xlabel("height in cm ")
plt.ylabel("weight in kg ")
for i ,x in enumerate(X_trian):
plt.scatter(x[0] , x[1], s= 40,c= 'b' , marker= "x" if y_train[i] == 'male' else 'D' , cmap="colormap" )
plt.grid(True)
plt.show()
x= np.array([
[155,70]
])
distances = np.sqrt(np.sum((X_trian - x )**2 , axis=1 ))
print(distances)
nearest_neighbor_indices = distances.argsort()[:3]
nearest_neighbor_genders = np.take(y_train , nearest_neighbor_indices)
print("take后的数据",nearest_neighbor_genders)
print(distances.argsort()[:3])
from collections import Counter
b = Counter(np.take(y_train ,distances.argsort()[:3]))
print("b=",b)
b.most_common(1)
print(b.most_common(1))
print(b.most_common(1)[0][0])
print(b.most_common(2))
print(b.most_common(2)[0][0],b.most_common(2)[1][0],b.most_common(2)[0][1],b.most_common(2)[1][1])
from sklearn.preprocessing import LabelBinarizer
from sklearn.neighbors import KNeighborsClassifier
lb = LabelBinarizer()
y_train_binarized = lb.fit_transform(y_train)
print(y_train_binarized)
k=3
clf = KNeighborsClassifier( n_neighbors = k )
print("---clf1-- :",clf)
data = clf.fit(X_trian , y_train_binarized.reshape(-1))
print("---clf2-- :",data)
prediction_binarized = clf.predict(np.array([155, 70 ]).reshape(1,-1))[0]
print("prediction_binarized",prediction_binarized)
predicted_lable = lb.inverse_transform(prediction_binarized)
print(predicted_lable)
print("------下面是一个小测试-------不用管-----------")
from sklearn import preprocessing
import numpy as np
X = np.array([[ 1., -1., 2.],[ 2., 0., 0.],[ 0., 1., -1.]])
print(X )
scaler= preprocessing.MinMaxScaler(feature_range=(-1, 1)).fit(X)
print(scaler)
print(X)
X_scaled = scaler.transform(X)
print(X)
print(X_scaled)
X1=scaler.inverse_transform(X_scaled)
print(X1)
print(X1[0, -1])
print("------------------------------------------")
结果:
[ 6.70820393 21.9317122 31.30495168 37.36308338 21. 13.60147051
25.17935662 16.2788206 15.29705854]
take后的数据 ['male' 'female' 'female']
[0 5 8]
b= Counter({'female': 2, 'male': 1})
[('female', 2)]
female
[('female', 2), ('male', 1)]
female male 2 1
[[1]
[1]
[1]
[1]
[0]
[0]
[0]
[0]
[0]]
---clf1-- : KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=None, n_neighbors=3, p=2,
weights='uniform')
---clf2-- : KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=None, n_neighbors=3, p=2,
weights='uniform')
prediction_binarized 0
['female']
------下面是一个小测试-------不用管-----------
[[ 1. -1. 2.]
[ 2. 0. 0.]
[ 0. 1. -1.]]
MinMaxScaler(copy=True, feature_range=(-1, 1))
[[ 1. -1. 2.]
[ 2. 0. 0.]
[ 0. 1. -1.]]
[[ 1. -1. 2.]
[ 2. 0. 0.]
[ 0. 1. -1.]]
[[ 0. -1. 1. ]
[ 1. 0. -0.33333333]
[-1. 1. -1. ]]
[[ 1. -1. 2.]
[ 2. 0. 0.]
[ 0. 1. -1.]]
2.0
------------------------------------------
Process finished with exit code 0
print( "***********************让我们来看正确率吧:****************")
print("先看准确率:")
print("准确率是针对结果而言的,就是所有实际正的样本中,预测对了的分值,tp/tp+fn")
X_test = np.array([
[168 , 65],
[180 , 96],
[160 , 52],
[169 , 67]
])
y_test = ['male' , 'male' , 'female' , 'female']
y_test_binarzed = lb.transform(y_test)
print("Binarized predictions : %s"% y_test_binarzed.T[0])
print("开始预测:")
prediction_binarized = clf.predict(X_test)
print("Binarizd labels : %s"% prediction_binarized)
print("prediceted lables :%s"% lb.inverse_transform(prediction_binarized))
print(" 预测结束")
print("&&&&&&开始比较&&&&&,\n ")
from sklearn.metrics import accuracy_score
print("计算准确率:")
print("Accuracy :%s "%accuracy_score(y_test_binarzed , prediction_binarized))
from sklearn.metrics import precision_score
print("计算精准率")
print('precision: %s :'% precision_score(y_test_binarzed , prediction_binarized))
from sklearn.metrics import recall_score
print("recall: %s"%recall_score(y_test_binarzed , prediction_binarized))
print("计算f1 (我也不知道是什么)")
from sklearn.metrics import f1_score
print("f1_score :%s "%f1_score(y_test_binarzed ,prediction_binarized))
print("马修系数")
from sklearn.metrics import matthews_corrcoef
print("matters correlation :%s "%matthews_corrcoef(y_test_binarzed ,prediction_binarized))
print("########计算各种率########")
from sklearn.metrics import classification_report
print(classification_report(y_test_binarzed ,prediction_binarized , target_names = ['name'],labels=[1]))
D:\python\python.exe D:/resu/python_test/打的程序/knn.py
[ 6.70820393 21.9317122 31.30495168 37.36308338 21. 13.60147051
25.17935662 16.2788206 15.29705854]
take后的数据 ['male' 'female' 'female']
[0 5 8]
b= Counter({'female': 2, 'male': 1})
[('female', 2)]
female
[('female', 2), ('male', 1)]
female male 2 1
[[1]
[1]
[1]
[1]
[0]
[0]
[0]
[0]
[0]]
---clf1-- : KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=None, n_neighbors=3, p=2,
weights='uniform')
---clf2-- : KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=None, n_neighbors=3, p=2,
weights='uniform')
prediction_binarized 0
['female']
------下面是一个小测试-------不用管-----------
[[ 1. -1. 2.]
[ 2. 0. 0.]
[ 0. 1. -1.]]
MinMaxScaler(copy=True, feature_range=(-1, 1))
[[ 1. -1. 2.]
[ 2. 0. 0.]
[ 0. 1. -1.]]
[[ 1. -1. 2.]
[ 2. 0. 0.]
[ 0. 1. -1.]]
[[ 0. -1. 1. ]
[ 1. 0. -0.33333333]
[-1. 1. -1. ]]
[[ 1. -1. 2.]
[ 2. 0. 0.]
[ 0. 1. -1.]]
2.0
------------------------------------------
***********************让我们来看正确率吧:****************
先看准确率:
准确率是针对结果而言的,就是所有实际正的样本中,预测对了的分值,tp/tp+fn
Binarized predictions : [1 1 0 0]
开始预测:
Binarizd labels : [0 1 0 0]
prediceted lables :['female' 'male' 'female' 'female']
预测结束
&&&&&&开始比较&&&&&,
计算准确率:
Accuracy :0.75
计算精准率
precision: 1.0 :
recall: 0.5
计算f1 (我也不知道是什么)
f1_score :0.6666666666666666
马修系数
matters correlation :0.5773502691896258
precision recall f1-score support
name 1.00 0.50 0.67 2
micro avg 1.00 0.50 0.67 2
macro avg 1.00 0.50 0.67 2
weighted avg 1.00 0.50 0.67 2