numpy和pytorch的argsort结果不同

Notes

将 mAP(@R)计算代码 改一份 pytorch 版的,结果跑出不同结果,发现 numpy 和 pytorch 的 argsort 返回的结果不同,测试算出的 mAP 差异有点大。
但是…并不能说有错,只是对相等的元素,numpy 好像还按下标升序排(稳定排序,归并?),pytorch 没有(快排?)。
这应该是指标的锅,没考虑到这种情况,需要修正定义,参见:tie-aware的检索指标。

Environment

  • numpy 1.18.1
  • pytorch 1.14.0

Code

in numpy

import numpy as np


def hamming(A, B):
    """A, B: [None, bit]
    elements in {-1, 1}
    """
    assert A.shape[1] == B.shape[1]
    bit = A.shape[1]
    D = (bit - np.dot(A, B.T)) // 2
    return D.astype(np.intc)


def calc_mAP(qF, rF, qL, rL, what=0, k=-1):
    """calculate mAP
    Args:
        qF: query feature/hash matrix
        rF: retrieval feature/hash matrix
        qL: query label matrix
        rL: retrieval label matrix
        what: {0: cosine, 1: hamming, 2: euclidean}
        k: mAP@k, default `-1` means mAP@ALL
    """
    n_query = qF.shape[0]
    if k == -1 or k > rF.shape[0]:
        k = rF.shape[0]
    Gnd = (np.dot(qL, rL.transpose()) > 0).astype(np.int)
    if what == 0:
        Rank = np.argsort(1 - cos(qF, rF))
    elif what == 1:
        Rank = np.argsort(hamming(qF, rF))
    elif what == 2:
        Rank = np.argsort(euclidean(qF, rF))
    print("dist:\n", hamming(qF, rF))
    print("rank:\n", Rank)

    AP = 0.0
    for it in range(n_query):
        print("---", it, "---")
        gnd = Gnd[it]
        if np.sum(gnd) == 0:
            continue
        rank = Rank[it][:k]
        gnd = gnd[rank]
        print("gnd:", gnd)
        if np.sum(gnd) == 0:
            continue
        pos = np.asarray(np.where(gnd == 1.)) + 1.0
        print("pos:", pos)
        rel_cnt = np.arange(pos.shape[-1]) + 1.0
        print("rel_cnt:", rel_cnt)
        AP += np.mean(rel_cnt / pos)
        print("ap:", np.mean(rel_cnt / pos))

    mAP = AP / n_query
    return mAP


if __name__ == "__main__":
    qB = np.array([[1, -1, 1, 1],
               [-1, -1, -1, 1],
               [1, 1, -1, 1],
               [1, 1, 1, -1]])
    rB = np.array([[1, -1, 1, -1],
                   [-1, -1, 1, -1],
                   [-1, -1, 1, -1],
                   [1, 1, -1, -1],
                   [-1, 1, -1, -1],
                   [1, 1, -1, 1]])
    query_L = np.array([[0, 1, 0, 0],
                        [1, 1, 0, 0],
                        [1, 0, 0, 1],
                        [0, 1, 0, 1]])
    retrieval_L = np.array([[1, 0, 0, 1],
                            [1, 1, 0, 0],
                            [0, 1, 1, 0],
                            [0, 0, 1, 0],
                            [1, 0, 0, 0],
                            [0, 0, 1, 0]])
    print("mAP test:", calc_mAP(qB, rB, query_L, retrieval_L, what=1))

输出:

dist:
[[1 2 2 3 4 2]
 [3 2 2 3 2 2]
 [3 4 4 1 2 0]
 [1 2 2 1 2 2]]
rank:
[[0 1 2 5 3 4]
 [1 2 4 5 0 3]
 [5 3 4 0 1 2]
 [0 3 1 2 4 5]]
--- 0 ---
gnd: [0 1 1 0 0 0]
pos: [[2. 3.]]
rel_cnt: [1. 2.]
ap: 0.5833333333333333
--- 1 ---
gnd: [1 1 1 0 1 0]
pos: [[1. 2. 3. 5.]]
rel_cnt: [1. 2. 3. 4.]
ap: 0.95
--- 2 ---
gnd: [0 0 1 1 1 0]
pos: [[3. 4. 5.]]
rel_cnt: [1. 2. 3.]
ap: 0.4777777777777777
--- 3 ---
gnd: [1 0 1 1 0 0]
pos: [[1. 3. 4.]]
rel_cnt: [1. 2. 3.]
ap: 0.8055555555555555
mAP test: 0.7041666666666666

in pytorch

import torch


def hamming(A, B=None):
    """A, B: [None, bit]
    elements in {-1, 1}
    """
    if B is None: B = A
    D = (A.size(1) - torch.mm(A, B.t())) / 2
    return D


def calc_mAP(qF, rF, qL, rL, what=0, k=-1):
    """calculate mAP
    Args:
        qF: query feature/hash matrix
        rF: retrieval feature/hash matrix
        qL: query label matrix
        rL: retrieval label matrix
        what: {0: cos, 1: hamming, 2: euclidean, 3: itom}
        k: mAP@k, default `-1` means mAP@ALL
    """
    n_query = qF.size(0)
    if k == -1 or k > rF.size(0):
        k = rF.size(0)
    Gnd = (torch.mm(qL, rL.t()) > 0).int()
    if what == 0 or what == "cosine":
        Rank = torch.argsort(1 - cos(qF, rF))
    elif what == 1 or what == "hamming":
        Rank = torch.argsort(hamming(qF, rF))
    elif what == 2 or what == "euclidean":
        Rank = torch.argsort(euclidean(qF, rF))
    # elif what == 3:
    #     Rank = torch.argsort(itom_dist(qF, rF))
    print("dist:\n", hamming(qF, rF))
    print("rank:\n", Rank)

    AP = 0.0
    for it in range(n_query):
        print("---", it, "---")
        gnd = Gnd[it]
        if gnd.sum() == 0:
            continue
        rank = Rank[it][:k]
        gnd = gnd[rank]
        print("gnd:", gnd)
        if gnd.sum() == 0:
            continue
        pos = gnd.nonzero().flatten().float() + 1.0
        print("pos:", pos)
        rel_cnt = torch.arange(pos.size(-1)) + 1.0
        rel_cnt = rel_cnt.to(device).float()
        print("rel_cnt:", rel_cnt)
        AP += (rel_cnt / pos).mean()
        print("ap:", (rel_cnt / pos).mean().item())

    mAP = AP / n_query
    return mAP


if __name__ == "__main__":
    qB = torch.FloatTensor([[1, -1, 1, 1],
                            [-1, -1, -1, 1],
                            [1, 1, -1, 1],
                            [1, 1, 1, -1]]).to(device)
    rB = torch.FloatTensor([[1, -1, 1, -1],
                            [-1, -1, 1, -1],
                            [-1, -1, 1, -1],
                            [1, 1, -1, -1],
                            [-1, 1, -1, -1],
                            [1, 1, -1, 1]]).to(device)
    qL = torch.FloatTensor([[0, 1, 0, 0],
                            [1, 1, 0, 0],
                            [1, 0, 0, 1],
                            [0, 1, 0, 1]]).to(device)
    rL = torch.FloatTensor([[1, 0, 0, 1],
                            [1, 1, 0, 0],
                            [0, 1, 1, 0],
                            [0, 0, 1, 0],
                            [1, 0, 0, 0],
                            [0, 0, 1, 0]]).to(device)
    print("mAP test:", calc_mAP(qB, rB, qL, rL, what=1).item())

输出:

dist:
tensor([[1., 2., 2., 3., 4., 2.],
        [3., 2., 2., 3., 2., 2.],
        [3., 4., 4., 1., 2., 0.],
        [1., 2., 2., 1., 2., 2.]], device='cuda:0')
rank:
tensor([[0, 2, 1, 5, 3, 4],
        [4, 5, 1, 2, 3, 0],
        [5, 3, 4, 0, 2, 1],
        [0, 3, 4, 5, 2, 1]], device='cuda:0')
--- 0 ---
gnd: tensor([0, 1, 1, 0, 0, 0], device='cuda:0', dtype=torch.int32)
pos: tensor([2., 3.], device='cuda:0')
rel_cnt: tensor([1., 2.], device='cuda:0')
ap: 0.5833333730697632
--- 1 ---
gnd: tensor([1, 0, 1, 1, 0, 1], device='cuda:0', dtype=torch.int32)
pos: tensor([1., 3., 4., 6.], device='cuda:0')
rel_cnt: tensor([1., 2., 3., 4.], device='cuda:0')
ap: 0.7708333730697632
--- 2 ---
gnd: tensor([0, 0, 1, 1, 0, 1], device='cuda:0', dtype=torch.int32)
pos: tensor([3., 4., 6.], device='cuda:0')
rel_cnt: tensor([1., 2., 3.], device='cuda:0')
ap: 0.444444477558136
--- 3 ---
gnd: tensor([1, 0, 0, 0, 1, 1], device='cuda:0', dtype=torch.int32)
pos: tensor([1., 5., 6.], device='cuda:0')
rel_cnt: tensor([1., 2., 3.], device='cuda:0')
ap: 0.6333333253860474
mAP test: 0.6079860925674438

令人窒息…

你可能感兴趣的:(机器学习)