flair.embeddings 对句子进行向量


import numpy as np
import pandas as pd
from gensim.models import KeyedVectors
from sklearn.cluster import KMeans


from flair.data import Sentence
from flair.embeddings import WordEmbeddings, FlairEmbeddings
from flair.embeddings import BytePairEmbeddings, StackedEmbeddings, Embeddings, WordEmbeddings, DocumentPoolEmbeddings


path = '/Users/ltx/Documents/RF_work/nlp-gym-240119/word2vec/cc.zh.300.vec'

import gensim
vectors = gensim.models.KeyedVectors.load_word2vec_format(path, binary=False)
vectors.save('/Users/ltx/Documents/RF_work/nlp-gym-240119/word2vec/fasttext_gensim')

# 这个调用方式,我想起来了,我好像是用save 存储过这些东西。

#embeddings = WordEmbeddings('/Users/ltx/Documents/RF_work/nlp-gym-240119/word2vec/fasttext_embedding_cn/fasttext_cn')
"""
sentence = Sentence('我 有 一 个 小 毛毛 驴') # 进入这个Sentence的必须进行分词
embeddings.embed(sentence) 
for token in sentence:
    print(token)
    print(token.embedding[0:10])
# 我做的一切都是正确,很好很欣慰
"""


embeddings = WordEmbeddings('/Users/ltx/Documents/RF_work/nlp-gym-240119/word2vec/fasttext_gensim')
doc_embeddings = DocumentPoolEmbeddings(embeddings)

input_text = '我有一个小毛驴'

sent = Sentence(list(input_text))
print(f'sent:{sent}')
doc_embeddings.embed(sent)

_current_token_embeddings = [token.embedding.cpu().detach() for token in sent]

print (_current_token_embeddings) # 来之不易的成功。

你可能感兴趣的:(算法)