直接写法
import numpy as np
def bit_product_sum(x, y):
return sum([item[0] * item[1] for item in zip(x, y)])
def cosine_similarity(x, y, norm=False):
""" 计算两个向量x和y的余弦相似度 """
assert len(x) == len(y), "len(x) != len(y)"
zero_list = [0] * len(x)
if x == zero_list or y == zero_list:
return float(1) if x == y else float(0)
# method 1
res = np.array([[x[i] * y[i], x[i] * x[i], y[i] * y[i]] for i in range(len(x))])
cos = sum(res[:, 0]) / (np.sqrt(sum(res[:, 1])) * np.sqrt(sum(res[:, 2])))
# method 2
# cos = bit_product_sum(x, y) / (np.sqrt(bit_product_sum(x, x)) * np.sqrt(bit_product_sum(y, y)))
# method 3
# dot_product, square_sum_x, square_sum_y = 0, 0, 0
# for i in range(len(x)):
# dot_product += x[i] * y[i]
# square_sum_x += x[i] * x[i]
# square_sum_y += y[i] * y[i]
# cos = dot_product / (np.sqrt(square_sum_x) * np.sqrt(square_sum_y))
return 0.5 * cos + 0.5 if norm else cos # 归一化到[0, 1]区间内
第一种 调用 sklearn 接口
import sklearn.metrics.pairwise as pw
leftfeature = sklearn.preprocessing.normalize(leftfeature)
rightfeature= sklearn.preprocessing.normalize(rightfeature)
print ("计算cosdistance") # 调用api接口
dis = pw.pairwise_distances(leftfeature, rightfeature, metric='cosine') # 返回的是什么
dis = 1-dis #cos distance [-1,1]
distance = np.empty((len(labels),)) # len(labels)=6000,把返回的值存到distance变量中
# print (len(labels))
for i in range(len(labels)):
distance[i] = dis[i][i]
print ('Distance before normalization:\n', distance)
print ('Distance max:', np.max(distance), 'Distance min:', np.min(distance), '\n')
# 距离需要归一化到0-1,与标签0-1匹配 每个值-最小/最大-最小 ,保证区间在0-1
distance_norm = np.empty((len(labels),))
for i in range(len(labels)):
distance_norm[i] = (distance[i] - np.min(distance)) / (np.max(distance) - np.min(distance))
print ('Distance after normalization:\n', distance_norm)
# 由distance_norm 和labels 计算精度
highestAccuracy, threshold = calculate_accuracy(distance_norm, labels, len(labels))
第二种直接计算得到距离
g_feats = g_feats / np.sqrt(np.sum(g_feats ** 2, -1, keepdims=True))
t_feats = t_feats / np.sqrt(np.sum(t_feats ** 2, -1, keepdims=True))
# gallery_label=np.concatenate((test_feats30, test_feats40), axis=0) #拼接
### 特征提取结束,进行 比对, top ceshi
print("特征的数量维度",img_feats.shape)
correct10 = 0
correct1= 0
for i,line in enumerate(t_feats): #模型输出
line = np.tile(line,(len(g_feats),1)) # repeat 一张图片扩充成所有的维度,用numpy ,方法统一比对
dis = np.sum(g_feats * line, 1) # save index correspond index
sort_index = np.argsort(-dis, axis=0) #默认 small to large -dis 从小到大 余弦距离最相似是1,
#然后 pytorch
它们的余弦相似度就是两个特征在经过L2归一化之后的矩阵内积 l2距离计算的就是公式中 A/|A|
得到的距离是 (-1,1),接近1 表示相似, 1-cos 之后范围变成 (2,0),和欧式距离表达的含义一样0表示最相似,
1表示余弦距离的0,基本已经不相似了,所以现在 1-cos是越小越接近,大于1基本不可能相似,也不用特意缩小范围 (0-1)之间,因为(-1,1)之间比0小或者比0.3小的阈值基本不可能相似了。
assert metric in ["cosine", "euclidean"], "must choose from [cosine, euclidean], but got {}".format(metric)
if metric == "cosine":
query_feat = F.normalize(torch.from_numpy(query_features), dim=1) #
gallery_feat = F.normalize(torch.from_numpy(gallery_features), dim=1)
dist = 1 - torch.mm(query_feat, gallery_feat.t()) #query 行 gallery 列 的dist
else:
m, n = query_features.size(0), gallery_features.size(0)
xx = torch.pow(torch.from_numpy(query_features), 2).sum(1, keepdim=True).expand(m, n)
yy = torch.pow(torch.from_numpy(gallery_features), 2).sum(1, keepdim=True).expand(n, m).t()
dist = xx + yy
dist.addmm_(1, -2, query_features, gallery_features.t())
dist = dist.clamp(min=1e-12).sqrt() # for numerical stability