一、注意事项
- 代码改编自mooc上嵩天老师的Python课程;
- 需要pip安装用于中文词频统计的jieba库;
- 代码简单,注释详细,就不过多解释代码了,虽然注释凌乱;
- 调试过程中,修改代码后,部分无关紧要的注释没有更改;
- 唯一需要注意的是,需要创建一个.txt文件,存放需要统计的文本.然后去main函数里找到’filename’变量,修改代码为该文本存放的路径;
- 完成第5步后,代码可直接运行.
- 英文的词频统计就更简单了,如果不涉及中文,只需要用str.split()用split函数利用英文词与词之间的空格分词即可,连jieba库都不需要用到。
二、代码
import turtle
import jieba
count = 19
data = []
words = []
yScale = 1
xScale = 30
banC=['的','而','是','好','着','了','又','在','一个','是的','可以','说','有','如','之','于','以','也','这','这样','\u3000','或', '和','新','等','为','要']
def drawLine(t, x1, y1, x2, y2):
t.penup()
t.goto (x1, y1)
t.pendown()
t.goto (x2, y2)
def drawText(t, x, y, text):
t.penup()
t.goto (x, y)
t.pendown()
t.write(text)
def drawGraph(t):
drawLine (t, 0, 0,580 , 0)
drawLine (t, 0, 380, 0, 0)
for x in range(count):
x=x+1
drawText(t, x*xScale-4, -20, (words[x-1]))
drawText(t, x*xScale-4, data[x-1]*yScale+10, data[x-1])
drawBar(t)
def drawRectangle(t, x, y):
x = x*xScale
y = y*yScale
drawLine(t, x-5, 0, x-5, y)
drawLine(t, x-5, y, x+5, y)
drawLine(t, x+5, y, x+5, 0)
drawLine(t, x+5, 0, x-5, 0)
def drawBar(t):
for i in range(count):
drawRectangle(t, i+1, data[i])
def processLine(line, wordCounts):
line = replacePunctuations(line)
"""从每一行获取每个词,无符号的字符串words = line.split()#分词成以每个字符串单词为元素的列表"""
words=jieba.lcut(line)
words=replaceWords(words)
for word in words:
if word in wordCounts:
wordCounts[word] += 1
else:
wordCounts[word] = 1
def replacePunctuations(line):
for ch in line:
if ch in "~@#$%^&*()_-+=<>?/,.:;{}[]|\',。/《》?;‘:“【】{}()——+-=*&……%¥#@!:/、|~!。..`~·~、——-() ’”“:~· \n""":
line = line.replace(ch, "")
return line
def replaceWords(words):
words1=words[:]
for word in words:
if word in banC:
words1.remove(word)
words=words1
return words
def main():
filename = 'D:\Python的文件\Code\中文词频统计并画表.txt'
infile = open(filename, "r")
wordCounts = {}
for line in infile:
processLine(line.lower(), wordCounts)
pairs = list(wordCounts.items())
items = [[x,y]for (y,x)in pairs]
items.sort()
for i in range(len(items)-1, len(items)-count-1, -1):
print(items[i][1]+"\t"+str(items[i][0]))
data.append(items[i][0])
words.append(items[i][1])
infile.close()
turtle.title('词频结果柱状图')
turtle.setup(1920,1060, 0, 0)
t = turtle.Turtle()
t.hideturtle()
t.width(3)
drawGraph(t)
main()
三、运行结果
![Python使用turtle库+jieba库完成简易中文词频统计,附代码_第1张图片](http://img.e-com-net.com/image/info8/0a61db969c914490a2b7334915480b08.jpg)
![Python使用turtle库+jieba库完成简易中文词频统计,附代码_第2张图片](http://img.e-com-net.com/image/info8/e55a6b0303c84a169e760c02ca3de40a.jpg)