链接是:
http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.NearestNeighbors.html#sklearn.neighbors.NearestNeighbors.radius_neighbors_graph
Example1.py
#-*- encoding:utf-8 -*-
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import numpy as np
from sklearn.neighbors import NearestNeighbors
samples=[[0,0,2],[1,0,0],[0,0,1]]
#--------------------------------------------------
neigh=NearestNeighbors(2,0.4)
neigh.fit(samples)#无监督训练,Samples作为候选列表
print neigh
#输出结果为:
#NearestNeighbors(algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=2, p=2, radius=0.4)
print"--------------------------------------------------"
neigh.kneighbors([[0,0,1.3]],2,return_distance=False)
print"--------------------------------------------------"
nbrs = neigh.radius_neighbors([[0, 0, 1.3]], 0.4, return_distance=False)
print nbrs#这个意思是和(0,0,1.3)最近的点是samples列表中的第2个点(从0开始数起)
print np.asarray(nbrs[0][0])
#相比arrary而言,asarrary不会占用新的内存
#参考链接:https://blog.csdn.net/gobsd/article/details/56485177
Example2.py
#-*- encoding:utf-8 -*-
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]
from sklearn.neighbors import NearestNeighbors
neigh = NearestNeighbors(n_neighbors=1)
neigh.fit(samples) #这个代码是无监督训练,意思是把上面三个点作为下面这个点(1,1,1)的备选项
print(neigh.kneighbors([[1., 1., 1.]]))
#(array([[ 0.5]]), array([[2]], dtype=int64))
#返回结果的含义是,最近的距离是0.5,离(1,1,1)最近的点是训练集中的第2个(从第0个开始数数)
#也就是说离(1,1,1)最近的点是训练集中的(1,1,0.5)
print"---------------------------------------------------------"
X = [[0., 1., 0.], [1., 0., 1.]]
print neigh.kneighbors(X, return_distance=False)
#结果为[[1] [2]]
#离(0,1,0)最近的点是训练集中的第1个数据(从第0个开始数数)
#离(1,0,1)最近的点是训练集中的第2个数据(从第0个开始数数)
Example3.py
#-*- encoding:Utf-8 -*-
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import numpy as np
samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]]
from sklearn.neighbors import NearestNeighbors
neigh = NearestNeighbors(radius=2.6)
neigh.fit(samples)
rng = neigh.radius_neighbors([[1., 1., 1.]])
print rng
print"rng[0][0]=",rng[0][0]#运行结果是[1.5 0.5],因为(0-1)²+(0.5-1)²+(0-1)²=2.25,所以开放后是1.5
print"rng[0][0]=",rng[1][0]#运行结果是[1,2]表示,离(1,1,1)距离在1.5以内的两个点,分别是第1个点和第2个点
print(np.asarray(rng[0][0]))
print(np.asarray(rng[1][0]))
# The first array returned contains the distances to all points which are closer than 1.6,
# while the second array returned contains their indices.
# In general, multiple points can be queried at the same time.
import numpy
print"-------------------下面单独计算欧氏距离--------------------------------------"
def calEuclideanDistance(vec1,vec2):
print"vec1-vec2=",vec1-vec2
print"numpy.square(vec1-vec2)=",sum(numpy.square(vec1 - vec2))
dist = numpy.sqrt(numpy.sum(numpy.square(vec1 - vec2)))
return dist
v1 = [0,0.5,0]
v2 = [1,1,1]
v1 = numpy.array(v1)
v2 = numpy.array(v2)
print calEuclideanDistance(v1,v2)
Example4.py
#-*- encoding:utf-8 -*-
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
print"-------------------------------------------"
X = [[0], [3], [1]]#这里表示3个点
print"X=",X
from sklearn.neighbors import NearestNeighbors
neigh = NearestNeighbors(n_neighbors=2,radius=1)
neigh.fit(X)
print"-------------------------------------------"
A = neigh.kneighbors_graph(X)
print A.toarray()#返回的这个矩阵的意思就是,如果是属于前k个最近点,那么就在矩阵上面标记1,否则标记0
# 也即是说,矩阵的每行必定会标记出k个非零值
# [[1. 0. 1.]
# [0. 1. 1.]
# [1. 0. 1.]]
print"-------------------------------------------"
A = neigh.kneighbors_graph(X,mode='distance')#当取mode为distance时,会对上面的bool矩阵标记上具体的距离
print A.toarray()
# [[0. 0. 1.]
# [0. 0. 2.]
# [1. 0. 0.]]
# 这个运行结果是对上一个运行结果的更进一步的修正。
#可以看做是上一个运行结果的K=1时候的结果,并且把结果中的“标记1”改为具体的两点间的距离数值
#参考链接:
#http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.NearestNeighbors.html#sklearn.neighbors.NearestNeighbors.radius_neighbors_graph
#可能需要图论的知识
再加一个predict_probe函数的用法:
#-*- encoding:utf-8 -*-
import sys
reload(sys)
#这个代码是监督学习
sys.setdefaultencoding('utf-8')
X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X, y)
print(neigh.predict([[1.1]]))
print(neigh.predict_proba([[0]]))#返回概率估计:0被预测为标签0的概率,0被预测为标签1的概率
print(neigh.predict_proba([[3]]))#返回概率估计:3被预测为标签0的概率,3被预测为标签1的概率
注意:
为了简单起见,上面的例子中,都是一维数据。