关于的余弦相似度公式,这里就不再描述,直接看如何使用sklearn自带的工具计算余弦相似度,具体代码如下:
计算向量和矩阵之间的余弦相似度时,其计算结果是向量和矩阵中的每一行计算结果的集合,注意最终结果为了方便查看,是越大越相似。
#%%
#矩阵,这里我用dataframe代替矩阵
a = pd.DataFrame(np.array([[1,2],[3,4],[5,6],[7,8]]),columns=['x','y'],dtype=float)
#向量
b = pd.DataFrame(np.array([[1,2]]),columns=['x','y'],dtype=float)
from sklearn.metrics.pairwise import cosine_similarity
r = cosine_similarity(a,b)
#%%
a = pd.DataFrame(np.array([[1,2],[3,4],[5,6],[7,8]]),columns=['x','y'],dtype=float)
b = pd.DataFrame(np.array([[1,2]]),columns=['x','y'],dtype=float)
#不重置索引,上下拼接
df = pd.concat([a,b],axis=0,join='inner',ignore_index=True)
#m,n = a.shape
#m0,n0 = b.shape
from sklearn.metrics.pairwise import cosine_similarity
r = cosine_similarity(np.array(a),np.array(b))
array([[1. ],
[0.98386991],
[0.97341717],
[0.96761727]])
a = pd.DataFrame(np.array([[1,2],[3,4],[5,6],[7,8]]),columns=['x','y'],dtype=float)
b = pd.DataFrame(np.array([[1,2],[3,4]]),columns=['x','y'],dtype=float)
#不重置索引,上下拼接
df = pd.concat([a,b],axis=0,join='inner',ignore_index=True)
#m,n = a.shape
#m0,n0 = b.shape
from sklearn.metrics.pairwise import cosine_similarity
r = cosine_similarity(a,b)
a = pd.DataFrame(np.array([[1,2],[3,4],[5,6],[7,8]]),columns=['x','y'],dtype=float)
b = pd.DataFrame(np.array([[1,2],[3,4]]),columns=['x','y'],dtype=float)
#不重置索引,上下拼接
df = pd.concat([a,b],axis=0,join='inner',ignore_index=True)
#m,n = a.shape
#m0,n0 = b.shape
from sklearn.metrics.pairwise import cosine_similarity
r = cosine_similarity(np.array(a),np.array(b))
array([[1. , 0.98386991],
[0.98386991, 1. ],
[0.97341717, 0.99868766],
[0.96761727, 0.99716412]])