import jieba
jieba.load_userdict(r'\百度分词词库.txt')
stops_word_path = r'\stopwords_all.txt'
stopwords = pd.read_table(stops_word_path,encoding='utf-8',quoting=3)['words'].tolist()
stopwords.append('\n')
stopwords.append(' ')
dic = dict()
file_path = r'C:\Users\Shy0418\Desktop\text.txt'
with open(file_path, encoding='utf-8', mode='r+') as file_read:
lines = file_read.readlines()
for line in lines:
for word in list(jieba.cut(line)):
if word not in stopwords:
if word in dic.keys():
dic[word] += 1
else:
dic[word] = 1
else:
continue
dic
word_freq = sorted(dic.items(), key = lambda kv:(kv[1], kv[0]), reverse=True)
word_freq