Python词频小工具,可以直接调用

1.先定义FreqWords()函数

from collections import Counter
import jieba

#计算词频
def FreqWords(txt, n_top=None, stopwords = None):
    #分词
    words = jieba.cut(txt)
    #去掉停用词
    if stopwords:
        words = [w for w in words if w not in stopwords]
    #计算词频
    freq = Counter(words)
    
    if n_top:
        return freq.most_common(n_top)
    else:
        return freq
    
if __name__=='__main__':
    file_path = input('请输入文本文件路径:')
    with open(file_path,encoding = 'utf-8') as f:
        txt = f.read()
    stop_path = input('请输入停用词文件路径:')
    with open(stop_path,encoding = 'utf-8') as stop:
        stopwords = stop.read()
        result = FreqWords(txt, 10, stopwords)
    print(result)

2.直接调用即可

import FreqWords

r = FreqWords.FreqWords('啦啦啦,今天天气很好',10,None)
print(r)

你可能感兴趣的:(文本,python,自然语言处理)