期末综合大作业:词频统计

#1
small = open('The Young Salesman.txt',mode='r',encoding='utf-8')
smallText = small.read()
small.close()
print(smallText)

#2
replaceList =['?','!',',','.','"',':'';']
for c in replaceList:
    smallText = smallText.replace(c,' ')
print(smallText)

#3
print(smallText.split(' '))
smallList = smallText.split(' ')

#4
smallSet = set(smallList)
print(smallSet)

smallDict = {}
for word in smallSet:
    smallDict[word] = smallList.count(word)
print(smallDict)
for d in smallDict:
    print(d,smallDict[d])

#5
wordCountList = list(smallDict.items())
print(wordCountList)
wordCountList.sort(key=lambda x:x[1],reverse=True)
print(wordCountList)

#6
for i in range(20):
    print(wordCountList[i])

#7
smallCountFile = open('smallCount.txt',mode='a',encoding='utf-8')
for i in range(len(wordCountList)):
    smallCountFile.write(str(wordCountList[i][1])+' '+wordCountList[i][0]+'\n')
smallCountFile.close()

期末综合大作业:词频统计_第1张图片

你可能感兴趣的:(期末综合大作业:词频统计)