用Python做中文分词和绘制词云图

用Python做中文分词和绘制词云图

Python窗体布局

def __init__(self):
	self.root=Tk()
	self.root.wm_title('绘制词云')
	self.root.resizable(0, 0)  # 禁止调整窗口大小
	self.frame=Frame(self.root)
	self.frame.pack()
	#布局共用11个控件,4个标签,4个输入框,2个文件浏览按钮和1个词云绘制按钮
	#row0
	self.lab_txt=Label(self.frame,text="选择词云文本文件").grid(row=0,column=0)
	self.vt_txtfile=StringVar()
	self.txt_file=Entry(self.frame,textvariable=self.vt_txtfile,justify='center').grid(row=0,column=1)
	self.but_browse1=Button(self.frame,text="浏览...",command=self.ev_but_browse1).grid(row=0,column=3)
	#row1
	self.lab_res=Label(self.frame,text="选择词云形状图形").grid(row=1,column=0)
	self.vt_txtpic=StringVar()
	self.txt_pic=Entry(self.frame,textvariable=self.vt_txtpic,justify='center').grid(row=1,column=1)
	self.but_browse2=Button(self.frame,text="浏览...",command=self.ev_but_browse2).grid(row=1,column=3)
	#row2
	self.lab_num=Label(self.frame,text="设置词云显示字数").grid(row=2,column=0)
	self.vt_txtnum=StringVar()
	self.txt_num=Entry(self.frame,textvariable=self.vt_txtnum,justify='center').grid(row=2,column=1,columnspan=2)
	#row3
	self.lab_fontsize=Label(self.frame,text="设置词云字体大小").grid(row=3,column=0)
	self.vt_txtfontsize=StringVar()
	self.txt_fontsize=Entry(self.frame,textvariable=self.vt_txtfontsize,justify='center').grid(row=3,column=1,columnspan=2)
	#row4
	self.but_draw=Button(self.frame,text="绘制词云",command=self.ev_but_draw).grid(row=4,columnspan=3)

	self.vt_txtnum.set(100)
	self.vt_txtfontsize.set(150)

文本文件浏览按钮事件

def ev_but_browse1(self):
	default_dir =os.getcwd()# 设置默认打开目录为当前工作目录
	# 返回文件全路径
	fname = filedialog.askopenfilename(title=u"选择文件",
                                 initialdir=(os.path.expanduser(default_dir)))
	self.vt_txtfile.set(fname)

词云形状图片文件浏览按钮事件

def ev_but_browse2(self):
	default_dir =os.getcwd()# 设置默认打开目录为当前工作目录
	# 返回文件全路径
	fname = filedialog.askopenfilename(title=u"选择文件",
                                 initialdir=(os.path.expanduser(default_dir)))
	self.vt_txtpic.set(fname)

词云绘制按钮事件

def ev_but_draw(self):
	tx=self.vt_txtfile.get()
	pic=self.vt_txtpic.get()
	num=int(self.vt_txtnum.get())
	font_size=int(self.vt_txtfontsize.get())
	path_of_font = os.path.join(os.path.dirname("C:/Windows/Fonts"), "simhei.ttf")
	#从本地文件系统读取内容
	text_from_file_with_path = open(tx).read()
	wordlist_after_jieba = jieba.cut(text_from_file_with_path, cut_all = True)
	wl_space_split = " ".join(wordlist_after_jieba)
	#加载背景图片
	cloud_mask = np.array(Image.open(pic))
	my_wordcloud =WordCloud(max_words=num,font_path=path_of_font,max_font_size=font_size,mask=cloud_mask, margin=10,
           random_state=1).generate(wl_space_split)
	default_colors = my_wordcloud.to_array()
	title="词云"
	#解决中文显示问题
	plt.rcParams['font.sans-serif']=['SimHei']
	plt.rcParams['axes.unicode_minus'] = False
	plt.title(title,fontsize=30,color='g')
	plt.imshow(default_colors, interpolation="bilinear")
	plt.axis("off")
	plt.show()

运行效果图

用Python做中文分词和绘制词云图_第1张图片
用Python做中文分词和绘制词云图_第2张图片

完整代码

from tkinter import *
from tkinter import messagebox
from tkinter import filedialog
import os

import matplotlib.pyplot as plt
from wordcloud import WordCloud
import jieba
from PIL import Image
import numpy as np
import random

class VGUIAPP:
	def __init__(self):
		self.root=Tk()
		self.root.wm_title('绘制词云')
		self.root.resizable(0, 0)  # 禁止调整窗口大小
		self.frame=Frame(self.root)
		self.frame.pack()
		#row0
		self.lab_txt=Label(self.frame,text="选择词云文本文件").grid(row=0,column=0)
		self.vt_txtfile=StringVar()
		self.txt_file=Entry(self.frame,textvariable=self.vt_txtfile,justify='center').grid(row=0,column=1)
		self.but_browse1=Button(self.frame,text="浏览...",command=self.ev_but_browse1).grid(row=0,column=3)
		#row1
		self.lab_res=Label(self.frame,text="选择词云形状图形").grid(row=1,column=0)
		self.vt_txtpic=StringVar()
		self.txt_pic=Entry(self.frame,textvariable=self.vt_txtpic,justify='center').grid(row=1,column=1)
		self.but_browse2=Button(self.frame,text="浏览...",command=self.ev_but_browse2).grid(row=1,column=3)
		#row2
		self.lab_num=Label(self.frame,text="设置词云显示字数").grid(row=2,column=0)
		self.vt_txtnum=StringVar()
		self.txt_num=Entry(self.frame,textvariable=self.vt_txtnum,justify='center').grid(row=2,column=1,columnspan=2)
		#row3
		self.lab_fontsize=Label(self.frame,text="设置词云字体大小").grid(row=3,column=0)
		self.vt_txtfontsize=StringVar()
		self.txt_fontsize=Entry(self.frame,textvariable=self.vt_txtfontsize,justify='center').grid(row=3,column=1,columnspan=2)
		#row4
		self.but_draw=Button(self.frame,text="绘制词云",command=self.ev_but_draw).grid(row=4,columnspan=3)    
		self.vt_txtnum.set(100)
		self.vt_txtfontsize.set(150)

	def ev_but_browse1(self):
		default_dir =os.getcwd()# 设置默认打开目录为当前工作目录
		# 返回文件全路径
		fname = filedialog.askopenfilename(title=u"选择文件",
                                     initialdir=(os.path.expanduser(default_dir)))
		self.vt_txtfile.set(fname)		
		
	def ev_but_browse2(self):
		default_dir =os.getcwd()# 设置默认打开目录为当前工作目录
		# 返回文件全路径
		fname = filedialog.askopenfilename(title=u"选择文件",
                                     initialdir=(os.path.expanduser(default_dir)))
		self.vt_txtpic.set(fname)

	def ev_but_draw(self):
		tx=self.vt_txtfile.get()
		pic=self.vt_txtpic.get()
		num=int(self.vt_txtnum.get())
		font_size=int(self.vt_txtfontsize.get())
		path_of_font = os.path.join(os.path.dirname("C:/Windows/Fonts"), "simhei.ttf")
		#从本地文件系统读取内容
		text_from_file_with_path = open(tx).read()
		wordlist_after_jieba = jieba.cut(text_from_file_with_path, cut_all = True)
		wl_space_split = " ".join(wordlist_after_jieba)
		#加载背景图片
		cloud_mask = np.array(Image.open(pic))
		my_wordcloud =WordCloud(max_words=num,font_path=path_of_font,max_font_size=font_size,mask=cloud_mask, margin=10,
               random_state=1).generate(wl_space_split)
		default_colors = my_wordcloud.to_array()
		title="词云"
		#解决中文显示问题
		plt.rcParams['font.sans-serif']=['SimHei']
		plt.rcParams['axes.unicode_minus'] = False
		plt.title(title,fontsize=30,color='g')
		plt.imshow(default_colors, interpolation="bilinear")
		plt.axis("off")
		plt.show()

mapp_v=VGUIAPP()
mapp_v.root.mainloop()

致谢

感谢王树义老师、dorami321等人的博客,感谢所有在网上开源和不吝赐教的无私奉献者。

主要参考博客网址

https://www.jianshu.com/p/207db98655b1
https://www.jianshu.com/p/af6201d2f0c6
https://blog.csdn.net/weixin_37986926/article/details/79004761

你可能感兴趣的:(python,编程)