import re
from string import punctuation
# 读取文件
with open('D://onelife.txt', encoding='utf-8') as f1:
contents = f1.readlines()
# 遍历每行的单词
for content in contents:
# 将字母转成小写
content = content.lower()
# 过滤标点符号
content = re.sub('[{}]'.format(punctuation + '《》'), ' ', content)
# 定义一个空的字典用来统计词频
WordConut = {}
# 将每行单词转成列表
words = content.split()
for word in words:
# 判断单词是否在字典中 存在加1
if word in WordConut:
WordConut[word] += 1
else:
WordConut[word] = 1
# 将字典转成列表
WordConut = WordConut.items()
# 对列表进行排序
items = sorted(WordConut, key=lambda x: x[1])
# 按词频降序
for i in range(len(items)-1, 0, -1):
print(items[i][0],':',items[i][1])
countries_zh.csv
# 引入有模板首航跳过
from itertools import islice
# 定义一个空字典
direct = {}
# 读文件
with open('D://countries_zh.csv', encoding='utf-8') as f1:
# 首行跳过
for line in islice(f1, 1, None):
# 将每行截成字符数组
item = line.split(',')
# 将字符串转成整形
item[4] = int(item[4].split('\n')[0])
# 将每行的单词以key:value写入字典中
direct[item[0] + ',' + item[1] + ',' + item[2] + ',' + item[3]] = item[4]
# 将字典转成列表
direct = direct.items()
# 对列表排序
list = sorted(direct, key=lambda x: x[1])
# 最后在对列表降序
for i in range(len(list) - 1, 0, -1):
print(list[i][0], ',', (list[i][1]))