求文件中 the total number of unique words,The average length of all words in the text,the top five most commonly used words in the text
#!/usr/bin/python
# -*- coding: UTF-8 -*-
def getText():
txt= open('Rental.txt','rb',encoding='UTF-8').read()
#while open('Rental.txt','rb') as f:
#txt = f.readline()
txt = txt.lower()
# print(txt)
for ch in '!"#$%&()*+,-./:;<=>?@[\\]^_‘{|}~1234567890':
txt = txt.replace(ch, " ")
return txt
hamletTxt = getText()
words = hamletTxt.split()
counts = {}
for word in words:
counts[word] = counts.get(word,0) + 1
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse=True)
t=set(items)
#print(t)
#print(items[0])
#print(len(t))
num=0
sum=0
print("the total number of unique words in the {}\n".format(len(t)))
for i in range(len(items)):
word, count = items[i]
t=len(word)
sum=t*count+sum
num+=count
#print(num)
print("The average length of all words in the text is {}\n".format((sum/num)))
print("the top five most commonly used words in the text ")
for i in range(5):
word, count = items[i]
print ("{0:<10}{1:>5}".format(word, count))