python3学习(4):用python编写一个统计分析某text文件里的字、空格、其他字符的数量,并统计排序频率最高的前10个字

### 本游戏旨在计算并打印出一个文件中内容的统计数据。比如,以只读方式统计并打印 出文件包含多少个字符、行和单词,
# 并统计出现次数最多的前10个单词,按出现次数排列好。(文件类型主要是 .txt .xls .doc-- written by LiSongbo
Words = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
        'r', 's', 't', 'u', 'v','w', 'x', 'y', 'z', ' ', '-', "'"}
def normalize(Rocky):  ##-- written by LiSongbo
    result = ''
    for n in Rocky.lower():
        if n in Words:
            result += n
    return result

def make_frequent_dict(Lee): ##-- written by LiSongbo
    Lee = normalize(Lee)
    words = Lee.split()
    R = {} ## return a dictionary whose keys are the words of Lee,and whose values are the couts of those words
    for w in words:
        if w in R:
            R[w] += 1
        else:
            R[w]=1 ## if this tis the first time for w comes up in dictionary R,set it's count as 1
    return R

def file_count(fname): ##-- written by LiSongbo
    '''print statistics for the given file.'''
    Rocky_Lee = open(fname,'r').read()
    num_chars = len(Rocky_Lee)
    num_lines = Rocky_Lee.count('\n')
    d = make_frequent_dict(Rocky_Lee)
    num_words = sum(d[w] for w in d) ## 计算 Rocky_Lee 包含多少个单词

    lst = [(d[w],w) for w in d]
    lst.sort()
    lst.reverse()

    print("This file '%s' has: " % fname)
    print('   %s characters' % num_chars)
    print('   %s lines'% num_lines)
    print('   %s words'% num_words)
    print('\nThe top 10 most frequent words are: ')
    i = 1
    for count,word in lst[0:10]:
        print('%2s. %4s %s' % (i,count,word))
        i += 1

def main():  ##-- written by LiSongbo
    file_count('test1.txt')
if __name__ == '__main__':
    main()

# 运行结果
This file 'test1.txt' has: 
   84709 characters
   276 lines
   14547 words


The top 10 most frequent words are: 
 1. 1124 the
 2.  648 of
 3.  474 and
 4.  333 a
 5.  328 to
 6.  318 in
 7.  224 i
 8.  161 was
 9.  140 which
10.  126 it


Process finished with exit code 0

你可能感兴趣的:(Python学习笔记)