使用gensim加载预训练的词向量

使用gensim加载预训练的词向量,并采用谷歌的self-attention方法计算不同词之间的相关性

from nltk import word_tokenize
s='Concurrent therapy with ORENCIA and TNF antagonists is not recommended'
token=word_tokenize(s)
print(token)
import gensim
import numpy as np
model=gensim.models.KeyedVectors.load_word2vec_format(r'PubMed-and-PMC-w2v.bin',binary=True)

vec=[]
for word in token:
    if word in model.vocab:
        vec.append(model[word])
    else:
        vec.append(np.zeros(model.vector_size))
vec=np.asarray(vec)
for i in vec:
    print(np.dot(vec[3],i)/np.sqrt(200))	# 第三个词和其他词相互关系
    print(np.dot(vec[5], i)/np.sqrt(200))
    print(np.dot(vec[6], i)/np.sqrt(200))

你可能感兴趣的:(python,NLP)