a = torch.randn(100,128)
b = torch.randn(100,128)
torch.cosine_similarity(a, b, dim=-1).shape
>>> torch.Size([100])
也就是希望得到一个 [N, N]的一个矩阵
方法一:循环
def get_att_dis(target, behaviored):
attention_distribution = []
for i in range(target.size(0)):
attention_score = torch.cosine_similarity(target[i].view(1, -1), behaviored) # 计算每一个元素与给定元素的余弦相似度
attention_distribution.append(attention_score)
return torch.stack(attention_distribution)
方法二:广播
s = torch.cosine_similarity(a.unsqueeze(1), b.unsqueeze(0), dim=-1)
s.shape
>>> [100,100]