本文章中的相似度计算方法摘抄自:BiMPM
1 带权重的cosine相似度计算
这个方法与传统的cosine相似度计算方法不同之处就是为输入的两个向量增加了一个权重,可以通过训练权重来调节向量,获得更加精确的相似度。
import torch
import torch.nn as nn
class CosineSimilarityWithW(nn.Module):
def __init__(self):
super(CosineSimilarityWithW,self).__init__()
self.l=1
self.hidden_size=30
for i in range(1, 9):#源代码中就是这样写的,我就直接搬过来用了,改为2也是可以的
setattr(self, f'mp_w{i}',
nn.Parameter(torch.rand(self.l, self.hidden_size)))
self.reset_parameters()
def reset_parameters(self):
for i in range(1, 9):
w = getattr(self, f'mp_w{i}')
nn.init.kaiming_normal(w)
def mp_matching_func(self,v1, v2, w):
"""
:param v1: (batch, seq_len, hidden_size)
:param v2: (batch, seq_len, hidden_size) or (batch, hidden_size)
:param w: (l, hidden_size)
:return: (batch, l)
"""
seq_len = v1.size(1)
# Trick for large memory requirement
"""
if len(v2.size()) == 2:
v2 = torch.stack([v2] * seq_len, dim=1)
m = []
for i in range(self.l):
# v1: (batch, seq_len, hidden_size)
# v2: (batch, seq_len, hidden_size)
# w: (1, 1, hidden_size)
# -> (batch, seq_len)
m.append(F.cosine_similarity(w[i].view(1, 1, -1) * v1, w[i].view(1, 1, -1) * v2, dim=2))
# list of (batch, seq_len) -> (batch, seq_len, l)
m = torch.stack(m, dim=2)
"""
# (1, 1, hidden_size, l)
w = w.transpose(1, 0).unsqueeze(0).unsqueeze(0)
# (batch, seq_len, hidden_size, l)
v1 = w * torch.stack([v1] * self.l, dim=3)
print(v1.size())
if len(v2.size()) == 3:
v2 = w * torch.stack([v2] * self.l, dim=3)
else:
v2 = w * torch.stack([torch.stack([v2] * seq_len, dim=1)] * self.l, dim=3)
m = torch.cosine_similarity(v1, v2, dim=2)
return m
#随机生成向量
tensor_1=torch.randn((5,6,30))
print(tensor_1.size())
tensor_2=torch.randn((5,4,30))
hidden=30
#声明cosine
cos=CosineSimilarityWithW()
#声明一个双向lstm
lstm=nn.LSTM(30,hidden,bidirectional=True,batch_first=True)
tensor_1,_=lstm(tensor_1)
tensor_2,_=lstm(tensor_2)
#通过torch.split把双向的lstm的输出,划分为单向的,f是前,b是后
tensor_1_fw,tensor_1_bw=torch.split(tensor_1,hidden,dim=-1)
tensor_2_fw,tensor_2_bw=torch.split(tensor_2,hidden,dim=-1)
print(tensor_1_fw.size())
print(tensor_2_fw.size())
#计算的时候,计算各种方向的,第二参数就是取lstm输出的最后一个状态的值
mv_p_full_fw = cos.mp_matching_func(tensor_1_fw, tensor_2_fw[:, -1, :], cos.mp_w1)
mv_p_full_bw = cos.mp_matching_func(tensor_1_bw, tensor_2_bw[:, 0, :], cos.mp_w2)
mv_h_full_fw = cos.mp_matching_func(tensor_2_fw, tensor_1_fw[:, -1, :], cos.mp_w1)
mv_h_full_bw = cos.mp_matching_func(tensor_2_bw, tensor_1_bw[:, 0, :], cos.mp_w2)
print(mv_p_full_fw.size())
print(mv_h_full_bw.size())
输出如下:
具体的效果还没有尝试,这个随机出来的值,输出之后也没什么用,所以就输出一些size。
torch.Size([5, 6, 30])
test.py:17: UserWarning: nn.init.kaiming_normal is now deprecated in favor of nn.init.kaiming_normal_.
nn.init.kaiming_normal(w)
torch.Size([5, 6, 30])
torch.Size([5, 4, 30])
torch.Size([5, 6, 30, 1])
torch.Size([5, 6, 30, 1])
torch.Size([5, 4, 30, 1])
torch.Size([5, 4, 30, 1])
torch.Size([5, 6, 1])
torch.Size([5, 4, 1])
2 基于余弦相似度的向量极值计算
看论文里的,改到pytorch里面。
import torch
import torch.nn as nn
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
class VectorExtrema(nn.Module):
def forward(self,emb_1,emb_2):
print("1size",emb_1.size())
print("2size",emb_2.size())
emb_1,emb_2=emb_1.numpy(),emb_2.numpy()
cos_similarity=cosine_similarity(emb_1,emb_2)
print("cos_similarity",cos_similarity)
cos_similarity = np.max(cos_similarity, axis=0).mean()
print("after( max mean): ",cos_similarity)
return torch.from_numpy(np.array(cos_similarity) )
ve=VectorExtrema()
emb_1=torch.randn(size=(2,3))
emb_2=torch.randn(size=(2,3))
print(emb_1)
print(emb_2)
v=ve(emb_1,emb_2)
print("common cos:",torch.cosine_similarity(emb_1,emb_2))
tensor([[ 0.3011, 0.0058, 0.5957],
[ 0.6495, -0.0936, -0.4076]])
tensor([[ 1.9169, 1.3727, 1.9278],
[-0.0489, 0.8407, -2.1308]])
1size torch.Size([2, 3])
2size torch.Size([2, 3])
cos_similarity [[ 0.85275286 -0.83639467]
[ 0.14057721 0.42835093]]
after( max mean): 0.6405519
common cos: tensor([0.8528, 0.4284])
这里其实就会把前面cosine_similary中的计算出来的值进行相加,换成了选取其中的一个最大值。sklearn里面的cosine_similary就是只做了除整合以外的工作。