需求分析:指定任意英文文本,统计文本中个单词出现的频率,并选出出现频率最高的10个单词
环境:python 3.6.0
IDE:PyCharm 2017.2.3
module:string
数据类型:字符串,列表,元组,字典
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import string
import matplotlib.pyplot as plt
words_freq={ }
f=open("《教父》小说英文版.txt")
for line in f:
#间类似于twenty-one的单词分为两个单词
#分词
words=line.replace("-"," ").split()
for word in words:
#去掉单词前后的标点符号
word=word.strip(string.punctuation)
#所有单词不区分大小写,全部转成小写
word=word.lower()
#统计,以字典存储
if word in words_freq:
words_freq[word]+=1
else:
words_freq[word]=1
freq_words=[]
for word,freq in words_freq.items():
freq_words.append((freq,word))
freq_words.sort(reverse=True)
for freq,word in freq_words[:10]]
print(word,freq)