余弦相似性及欧式距离的计算

def normalize(nparray, order=2, axis=0):#归一化[0,1]
  """Normalize a N-D numpy array along the specified axis."""
  norm = np.linalg.norm(nparray, ord=order, axis=axis, keepdims=True)
  return nparray / (norm + np.finfo(np.float32).eps)

def compute_dist(array1, array2, type='euclidean'):

#type='euclidean'代表是计算欧式距离,type='euclidean'代表是余弦相似性

  # """Compute the euclidean or cosine distance of all pairs.
  # Args:
  #   array1: numpy array with shape [m1, n]
  #   array2: numpy array with shape [m2, n]
  #   type: one of ['cosine', 'euclidean']
  # Returns:
  #   numpy array with shape [m1, m2]
  # """
assert type in ['cosine', 'euclidean']
if type == 'cosine':
             array1 = normalize(array1, axis=1)#进行归一化
             array2 = normalize(array2, axis=1)

dist = np.matmul(array1, array2.T)#点乘
return dist
else:
    
square1 = np.sum(np.square(array1))[..., np.newaxis]
    
square2 = np.sum(np.square(array2))[np.newaxis, ...]
squared_dist = - 2 * np.matmul(array1, array2.T) + square1 + square2
squared_dist[squared_dist < 0] = 0
dist = np.sqrt(squared_dist)

return dist

#以下函数主要是根据属性距离来计算寻找两个属性之间的距离最近的 并且判断是不是同一个类别,如果是一个类别,就是test正确,并且correct加一。

correct = 0

for i in range(2966):
  # min = compute_dist(test_feature[i],test_feature[0],type='euclidean')
min = sys.maxsize
# print(len(min)) #9223372036854775807
for j in range(2966):
if i == j:
     
continue#i==j代表是同一个测试数据,直接跳过

distance = compute_dist(query_output[i],query_output[j],type='euclidean')
# print(distance)
if distance <= min:
min = distance
index = j
if query_label_idx[i] == query_label_idx[index]:
correct += 1
# print(correct)  
print('accuracy:  %f'%(correct/2966))#2966代表有2966 条数据。


你可能感兴趣的:(余弦相似性及欧式距离的计算)