python 2-5(2019-11-01 )(KNN)

自己实现KNN算法

from matplotlib import pyplot as plt
import numpy as np
raw_data_X =  [[3.393533211, 2.331273381],
              [3.110073483, 1.781539638],
              [1.343808831, 3.368360954],
              [3.582294042, 4.679179110],
              [2.280362439, 2.866990263],
              [7.423436942, 4.696522875],
              [5.745051997, 3.533989803],
              [9.172168622, 2.511101045],
              [7.792783481, 3.424088941],
              [7.939820817, 0.791637231]
             ]
# 转化成ndarry类型
X_train = np.array(raw_data_X)
X_train
array([[3.39353321, 2.33127338],
       [3.11007348, 1.78153964],
       [1.34380883, 3.36836095],
       [3.58229404, 4.67917911],
       [2.28036244, 2.86699026],
       [7.42343694, 4.69652288],
       [5.745052  , 3.5339898 ],
       [9.17216862, 2.51110105],
       [7.79278348, 3.42408894],
       [7.93982082, 0.79163723]])
raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
y_train = np.array(raw_data_y)
y_train
array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
### 对数据进行可视化
plt.scatter(X_train[y_train ==0,0],X_train[y_train ==0,1],color='g')
plt.scatter(X_train[y_train ==1,0],X_train[y_train ==1,1],color='r')
plt.show()
output_7_0.png

预测

#假设新来一个样本数据判断x是恶性还是良性
x = np.array([8.093607318, 3.365731514])
plt.scatter(X_train[y_train ==0,0],X_train[y_train ==0,1],color='g')
plt.scatter(X_train[y_train ==1,0],X_train[y_train ==1,1],color='r')
plt.scatter(x[0],x[1],color='b')
plt.show()
output_10_0.png
通过knn算法来预测
# 计算x距离所有的是十个个点的距离,然后选距离最近的前k个

from math import sqrt
distances = []
# for x_train in X_train:
#     d = sqrt(np.sum((x_train-x)**2))
#     distances.append(d)
distances = [sqrt(np.sum((x_train-x)**2)) for x_train in X_train]
nearest = np.argsort(distances)
nearest
array([8, 7, 5, 6, 9, 3, 0, 1, 4, 2], dtype=int64)
# 假设我们指定K的值是6
k = 6
top_k_y =[y_train[i] for i in nearest[:6]]
top_k_y
[1, 1, 1, 1, 1, 0]
#数据统计量大的话使用的统计方法
from collections import Counter
votes = Counter(top_k_y)
votes
Counter({1: 5, 0: 1})
# 返回数量前 i 的数据信息
votes.most_common(1)
[(1, 5)]
predict_y =votes.most_common(1)[0][0]
predict_y
1

x患者是恶性肿瘤的几率大


你可能感兴趣的:(python 2-5(2019-11-01 )(KNN))