代码如下:
from sklearn.metrics.pairwise import cosine_similarity, paired_distances
def get_cosine_sk(v1, v2):
"""
批量计算余弦相似度
:param v1: m x k array
:param v2: n x k array
:return: m x n array
"""
simi = cosine_similarity(v1, v2) # 余弦相似度
dist = paired_distances(v1, v2, metric='cosine') # 余弦距离
# dist = 1 - simi # 余弦距离
return simi, dist
def batch_cosine(x, y):
"""
批量计算余弦相似度
:param x: m x k array
:param y: n x k array
:return: m x n array
"""
xx = np.sum(x ** 2, axis=1) ** 0.5
x = x / xx[:, np.newaxis]
yy = np.sum(y ** 2, axis=1) ** 0.5
y = y / yy[:, np.newaxis]
simi = np.dot(x, y.transpose())
# dist = 1 - simi # 余弦距离
return simi