统计文章里的词频并降序输出

python基础


资料
onelife.txt

import re
from string import punctuation
# 读取文件
with open('D://onelife.txt', encoding='utf-8') as f1:
    contents = f1.readlines()
# 遍历每行的单词
for content in contents:
    # 将字母转成小写
    content = content.lower()
    # 过滤标点符号
    content = re.sub('[{}]'.format(punctuation + '《》'), ' ', content)
    # 定义一个空的字典用来统计词频
    WordConut = {}
    # 将每行单词转成列表
    words = content.split()
    for word in words:
        # 判断单词是否在字典中 存在加1
        if word in WordConut:
            WordConut[word] += 1
        else:
            WordConut[word] = 1
# 将字典转成列表
WordConut = WordConut.items()
# 对列表进行排序
items = sorted(WordConut, key=lambda x: x[1])
# 按词频降序
for i in range(len(items)-1, 0, -1):
    print(items[i][0],':',items[i][1])

统计文章里的词频并降序输出_第1张图片

countries_zh.csv

# 引入有模板首航跳过
from itertools import islice
# 定义一个空字典
direct = {}
# 读文件
with open('D://countries_zh.csv', encoding='utf-8') as  f1:
    # 首行跳过
    for line in islice(f1, 1, None):
        # 将每行截成字符数组
        item = line.split(',')
        # 将字符串转成整形
        item[4] = int(item[4].split('\n')[0])
        # 将每行的单词以key:value写入字典中
        direct[item[0] + ',' + item[1] + ',' + item[2] + ',' + item[3]] = item[4]
# 将字典转成列表
direct = direct.items()
# 对列表排序
list = sorted(direct, key=lambda x: x[1])
# 最后在对列表降序
for i in range(len(list) - 1, 0, -1):
    print(list[i][0], ',', (list[i][1]))

统计文章里的词频并降序输出_第2张图片

你可能感兴趣的:(python)