python 统计英文词频

python 统计英文词频 (以1986-2017年考研英语真题为例)

运行截图:
python 统计英文词频_第1张图片
代码:

import re
#除去简单词、序号等
excludes = ['the', 'of', 'to', 'and', 'in', 'a', 'is', 'were', 'was', 'you',
            'I', 'he', 'his', 'there', 'those', 'she', 'her', 'their',
            'that', '[a]', '[b]', '[c]', '[d]', 'them', 'or','for','as',
            'are','on','it','be','with','by','have','from','not','they',
            'more','but','an','at','we','has','can','this','your','which','will',
            'one','should','points)','________','________.','all','than','what',
            'people','if','been','its','new','our','would','part','may','some','i',
            'who','answer','when','most','so','section','no','into','do','only',
            'each','other','following','had','such','much','out','--','up','these',
            'even','how','directions:','use','because','(10','time','(15','[d].',
            '-','it.','[b],','[a],','however,','1','c','1','2','b','d','a','(10',
            '2','12','13','29','3','4','5','6','7','8','9','10','11','14',
            '15','20','22','23','24','25','26','27']

def gettext():
    txt=open("1986年到2017年考研英语2真题.txt","r").read()
    txt=txt.lower()
    for ch in '! " #$%&()*+,_./:;<=>?@[\\]^_`{|}~ ':
        txt=txt.replace(ch,"")
    return txt

Txt=gettext()
words=Txt.split()
counts={}
for word in words:
    flag=True
    for word1 in excludes:
        if word==word1:
            flag=False
        else:
            continue            
    if flag is True:
        counts[word]=counts.get(word,0)+1
    else:
        continue
            
countslist=list(counts.items())
countslist.sort(key=lambda x:x[1],reverse=True)

for i in range(10):
    word,count=countslist[i]
    print("{0:<10}{1:>5}".format(word,count))
    

你可能感兴趣的:(python,python)