计算文本中每个单词的使用频率,并从高到低进行排序
from string import punctuation
#打开数据导入
text = open('text.txt')
def count(text):
dic = dict() #创建新字典
for line in text:
word = line.split() #将字符串分割为list,默认分隔符为空格
for item in word:
item = item.strip(punctuation) #数据处理,将标点去除
word_low = item.lower() #统一转为小写字母进行比较
if word_low in dic:
dic[word_low] += 1
else:
dic[word_low] = 1
return sorted(dic.items(),key = lambda items:items[1],reverse = True
#按照字典的值的大小进行排序,从大到小
输出例子:
[(‘the’, 186),
(‘of’, 120),
(‘project’, 87),
(‘to’, 82),
(‘ja’, 82),
(‘or’, 78),
(‘you’, 73),
(‘and’, 70)]