wordlist = [] #一个列表,存放所有的词
counter = {} #一个字典,key为词,value为计数
fread = open('1.txt','r')
while True:
oneline = fread.readline()
if not oneline:
break
oneline = oneline.rstrip() #去掉这一行的换行符
onelinelist = oneline.split() #按照空格拆分这一行,这一行变成一个列表,包含所有词
if len(onelinelist) ==0:
continue
for w in onelinelist: #列表的每一个元素,即每一个词
if not w in wordlist:
wordlist.append(w) #如果这个词不在wordlist中,就加入wordlist
if not w in counter:
counter[w] = 1 #如果这个词不在字典中,就加入字典,value为1
else:
counter[w] +=1 #如果已存在,就在相应的value+1
counter_list = sorted(counter.items(),key=lambda x: x[1], reverse = True) #按照降序排序
print(counter_list)
为了支持excel读取,安装xlrd
pip install xlrd
统计词频
# wordlist = [] #一个列表,存放所有的词
# counter = {} #一个字典,key为词,value为计数
# fread = open('1.txt','r')
# while True:
# oneline = fread.readline()
# if not oneline:
# break
# oneline = oneline.rstrip() #去掉这一行的换行符
# onelinelist = oneline.split() #按照空格拆分这一行,这一行变成一个列表,包含所有词
# if len(onelinelist) ==0:
# continue
# for w in onelinelist: #列表的每一个元素,即每一个词
# if not w in wordlist:
# wordlist.append(w) #如果这个词不在wordlist中,就加入wordlist
# if not w in counter:
# counter[w] = 1 #如果这个词不在字典中,就加入字典,value为1
# else:
# counter[w] +=1 #如果已存在,就在相应的value+1
# counter_list = sorted(counter.items(),key=lambda x: x[1], reverse = True) #按照降序排序
# print(counter_list)
#
# d = {}
# a = "\魅力汉语第1册读写李红艳55\10\10.1.txt-计划与未来"
# x=a.split("-",1)
# print(x)
# d[x[0]]=x[1]
# print(d)
#
# import xlrd
# data = xlrd.open_workbook('1.xlsx')
# table = data.sheets()[0]
# a = table.cell(0,0).value
# print(a)
# a = table.cell(0,1).value
# print(a)
# a = table.cell(1,0).value
# print(a)
# a = table.cell(1,1).value
# print(a)
import xlrd
data = xlrd.open_workbook("我的数据.xlsx")
table = data.sheets()[0]
leibie = []
wenzhang = {}
fread = open("标题-类别(3392文本的二级话题).txt","r",encoding='utf-8')
while True:
a=fread.readline()
if not a:
break
b=a.rstrip()
c=b.split("-",1)
if c[1] not in leibie:
leibie.append(c[1])
wenzhang[c[0]]=c[1]
print(leibie)
name = table.cell(1,1).value
print(name)
print(wenzhang[name])