Python求文件中单词的个数,平均长度,出现最多的5个单词

求文件中 the total number of unique words,The average length of all words in the text,the top five most commonly used words in the text

#!/usr/bin/python
# -*- coding: UTF-8 -*-
def getText():
    txt= open('Rental.txt','rb',encoding='UTF-8').read()
    #while open('Rental.txt','rb') as f:
    #txt = f.readline()
    txt = txt.lower()
   # print(txt)
    for ch in '!"#$%&()*+,-./:;<=>?@[\\]^_‘{|}~1234567890':
        txt = txt.replace(ch, " ")
    return txt
hamletTxt = getText()
words  = hamletTxt.split()
counts = {}
for word in words:
    counts[word] = counts.get(word,0) + 1
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse=True)
t=set(items)
#print(t)
#print(items[0])
#print(len(t))
num=0
sum=0
print("the total number of unique words in the {}\n".format(len(t)))
for i in range(len(items)):
    word, count = items[i]
    t=len(word)
    sum=t*count+sum
    num+=count
#print(num)
print("The average length of all words in the text is {}\n".format((sum/num)))
print("the top five most commonly used words in the text ")
for i in range(5):
    word, count = items[i]
    print ("{0:<10}{1:>5}".format(word, count))

 

你可能感兴趣的:(Python)