python统计词频

wordlist = []  #一个列表,存放所有的词
counter = {}   #一个字典,key为词,value为计数

fread = open('1.txt','r')

while True:
	oneline = fread.readline()
	if not oneline:
		break
	oneline = oneline.rstrip() #去掉这一行的换行符
	onelinelist = oneline.split() #按照空格拆分这一行,这一行变成一个列表,包含所有词
	if len(onelinelist) ==0:
		continue
	for w in onelinelist:  #列表的每一个元素,即每一个词
		if not w in wordlist:
			wordlist.append(w)   #如果这个词不在wordlist中,就加入wordlist
		if not w in counter:
				counter[w] = 1   #如果这个词不在字典中,就加入字典,value为1
		else:
				counter[w] +=1   #如果已存在,就在相应的value+1
counter_list = sorted(counter.items(),key=lambda x: x[1], reverse = True) #按照降序排序

print(counter_list)

为了支持excel读取,安装xlrd

pip install xlrd

统计词频

# wordlist = []  #一个列表,存放所有的词
# counter = {}   #一个字典,key为词,value为计数

# fread = open('1.txt','r')

# while True:
# 	oneline = fread.readline()
# 	if not oneline:
# 		break
# 	oneline = oneline.rstrip() #去掉这一行的换行符
# 	onelinelist = oneline.split() #按照空格拆分这一行,这一行变成一个列表,包含所有词
# 	if len(onelinelist) ==0:
# 		continue
# 	for w in onelinelist:  #列表的每一个元素,即每一个词
# 		if not w in wordlist:
# 			wordlist.append(w)   #如果这个词不在wordlist中,就加入wordlist
# 		if not w in counter:
# 				counter[w] = 1   #如果这个词不在字典中,就加入字典,value为1
# 		else:
# 				counter[w] +=1   #如果已存在,就在相应的value+1
# counter_list = sorted(counter.items(),key=lambda x: x[1], reverse = True) #按照降序排序

# print(counter_list)
# 
# d = {}
# a = "\魅力汉语第1册读写李红艳55\10\10.1.txt-计划与未来"
# x=a.split("-",1)
# print(x)
# d[x[0]]=x[1]
# print(d)
# 
# import xlrd

# data = xlrd.open_workbook('1.xlsx')
# table = data.sheets()[0]
# a = table.cell(0,0).value

# print(a)

# a = table.cell(0,1).value

# print(a)

# a = table.cell(1,0).value

# print(a)

# a = table.cell(1,1).value

# print(a)

import xlrd
data = xlrd.open_workbook("我的数据.xlsx")
table = data.sheets()[0]

leibie = []
wenzhang = {}
fread = open("标题-类别(3392文本的二级话题).txt","r",encoding='utf-8')
while True:
	a=fread.readline()
	if not a:
		break
	b=a.rstrip()
	c=b.split("-",1)
	if c[1] not in leibie:
		leibie.append(c[1])
	wenzhang[c[0]]=c[1]
print(leibie)
name = table.cell(1,1).value 
print(name)
print(wenzhang[name])

你可能感兴趣的:(Python)