余弦距离比对对的几种写法 cos距离

余弦距离比对对的几种写法 cos距离_第1张图片

直接写法

import numpy as np


def bit_product_sum(x, y):
    return sum([item[0] * item[1] for item in zip(x, y)])


def cosine_similarity(x, y, norm=False):
    """ 计算两个向量x和y的余弦相似度 """
    assert len(x) == len(y), "len(x) != len(y)"
    zero_list = [0] * len(x)
    if x == zero_list or y == zero_list:
        return float(1) if x == y else float(0)

    # method 1
    res = np.array([[x[i] * y[i], x[i] * x[i], y[i] * y[i]] for i in range(len(x))])
    cos = sum(res[:, 0]) / (np.sqrt(sum(res[:, 1])) * np.sqrt(sum(res[:, 2])))

    # method 2
    # cos = bit_product_sum(x, y) / (np.sqrt(bit_product_sum(x, x)) * np.sqrt(bit_product_sum(y, y)))

    # method 3
    # dot_product, square_sum_x, square_sum_y = 0, 0, 0
    # for i in range(len(x)):
    #     dot_product += x[i] * y[i]
    #     square_sum_x += x[i] * x[i]
    #     square_sum_y += y[i] * y[i]
    # cos = dot_product / (np.sqrt(square_sum_x) * np.sqrt(square_sum_y))

    return 0.5 * cos + 0.5 if norm else cos  # 归一化到[0, 1]区间内

第一种 调用 sklearn 接口

import sklearn.metrics.pairwise as pw
    leftfeature = sklearn.preprocessing.normalize(leftfeature)
    rightfeature= sklearn.preprocessing.normalize(rightfeature)
    print ("计算cosdistance")  # 调用api接口
    dis = pw.pairwise_distances(leftfeature, rightfeature, metric='cosine')  # 返回的是什么
    dis = 1-dis   #cos distance [-1,1] 
    distance = np.empty((len(labels),))  # len(labels)=6000,把返回的值存到distance变量中
    # print (len(labels))
    for i in range(len(labels)):
        distance[i] = dis[i][i]
    print ('Distance before normalization:\n', distance)
    print ('Distance max:', np.max(distance), 'Distance min:', np.min(distance), '\n')
    # 距离需要归一化到0-1,与标签0-1匹配  每个值-最小/最大-最小  ,保证区间在0-1
    distance_norm = np.empty((len(labels),))
    for i in range(len(labels)):
        distance_norm[i] = (distance[i] - np.min(distance)) / (np.max(distance) - np.min(distance))
    print ('Distance after normalization:\n', distance_norm)
    # 由distance_norm 和labels 计算精度
    highestAccuracy, threshold = calculate_accuracy(distance_norm, labels, len(labels))

第二种直接计算得到距离

    g_feats = g_feats / np.sqrt(np.sum(g_feats ** 2, -1, keepdims=True))
    t_feats = t_feats / np.sqrt(np.sum(t_feats ** 2, -1, keepdims=True))
    # gallery_label=np.concatenate((test_feats30, test_feats40), axis=0) #拼接
    
    ### 特征提取结束,进行 比对, top ceshi 
    print("特征的数量维度",img_feats.shape)
    correct10 = 0
    correct1= 0
    for i,line in enumerate(t_feats):  #模型输出
        line = np.tile(line,(len(g_feats),1))  # repeat 一张图片扩充成所有的维度,用numpy ,方法统一比对
        dis = np.sum(g_feats * line, 1)  # save index  correspond index
        sort_index = np.argsort(-dis, axis=0) #默认 small to large  -dis 从小到大 余弦距离最相似是1,

#然后 pytorch

它们的余弦相似度就是两个特征在经过L2归一化之后的矩阵内积 l2距离计算的就是公式中  A/|A| 
得到的距离是 (-1,1),接近1 表示相似,   1-cos  之后范围变成 (2,0),和欧式距离表达的含义一样0表示最相似,
1表示余弦距离的0,基本已经不相似了,所以现在  1-cos是越小越接近,大于1基本不可能相似,也不用特意缩小范围 (0-1)之间,因为(-1,1)之间比0小或者比0.3小的阈值基本不可能相似了。

    assert metric in ["cosine", "euclidean"], "must choose from [cosine, euclidean], but got {}".format(metric)
    if metric == "cosine":
        query_feat = F.normalize(torch.from_numpy(query_features), dim=1)  #
        gallery_feat = F.normalize(torch.from_numpy(gallery_features), dim=1)
        dist = 1 - torch.mm(query_feat, gallery_feat.t())  #query 行  gallery 列  的dist

    else:
        m, n = query_features.size(0), gallery_features.size(0)
        xx = torch.pow(torch.from_numpy(query_features), 2).sum(1, keepdim=True).expand(m, n)
        yy = torch.pow(torch.from_numpy(gallery_features), 2).sum(1, keepdim=True).expand(n, m).t()
        dist = xx + yy
        dist.addmm_(1, -2, query_features, gallery_features.t())
        dist = dist.clamp(min=1e-12).sqrt()  # for numerical stability

你可能感兴趣的:(余弦距离比对对的几种写法 cos距离)