1,切分词语的步骤
import jieba
jieba.load_userdict(config.userdict) # 加载自定义关键词
corpara_sen = [] # 关键词语料库
for line in sentences:
seg_cut = jieba.cut(line.strip()) # 切分单词
words = " ".join(seg_cut) # 空格分隔
seg_split = words.split() # 去掉空格
seg = [word for word in seg_split if word not in stopwords] # 去掉停用词,根据停用词字典
corpora_sen.append(seg)
2,字典不能分的词