统计文本单词个数

使用内置函数

from collections import Counter
c = Counter()
with open('1.txt','r',encoding='utf-8') as f:
    for line in f.readlines():
        words = line.split()
        c1 = Counter(words)
        c.update(c1)


print(c)

自定义实现(未完善)

word_dict = dict()
s = '''python is a magical language.
    python is simple to learn.
    python is 27-year-old.
    When I began to learn python, I was 18 years old.
    '''
word_match = re.findall(r'\b(\w+)\b', s)
for word in word_match:
    #count = 0
    #word_dict[word] = word_dict.get(word, 0) + count
    #if word in word_dict:
    #    count += 1
    #    word_dict[word] = word_dict.get(word) + count
    # 优化
    word_dict[word] = word_dict.get(word, 0) + 1

# 从大到小排序
result = sorted(word_dict.items(), key=lambda d: d[1], reverse=True)
for each in result:
    print(each)

你可能感兴趣的:(统计文本单词个数)