期末综合大作业:词频统计

#1
f = open('Les Miserables悲惨世界.txt',mode='r',encoding='utf-8')
fText = f.read()#从文件里独处全部文本,字符串
print(fText)

#2
replacelist = ['?','.',',',':','"',"'"]
for c in replacelist:
    fText = fText.replace(c,'')#替换掉所有标点符号
print(fText)

#3
print(fText.split(' '))
fList = fText.split(' ')#列表出现的单词序列

#4
fSet = set(fList)#集合:有哪些单词
print(fSet)

fDict = {}
for word in fSet:
    fDict[word]=fList.count(word)
print(fDict)
for d in fDict:
    print(d,fDict[d])

#5
wordCountList = list(fDict.items())
print(wordCountList)
wordCountList.sort(key=lambda x:x[1],reverse=True)
print(wordCountList)

#6
for i in range(20):
    print(wordCountList[i])

#7
fCountFile = open('fText.txt',mode='a',encoding='utf-8')
for i in range(len(wordCountList)):
    fCountFile.write(str(wordCountList[i][1])+' '+wordCountList[i][0]+'\n')
fCountFile.close()

 

你可能感兴趣的:(期末综合大作业:词频统计)