matplotlib与pyecharts:词云可视化

随便下载一个txt小说

代码如下:

#coding:utf-8
from wordcloud import WordCloud,ImageColorGenerator,STOPWORDS
from scipy.misc import imread
import matplotlib.pyplot as plt
from collections import Counter
import jieba
#添加词库分词
my_word_list=['楚天箫']
def add_word(list):
    for items in list:
        jieba.add_word(items)
add_word(my_word_list)
#jieba.load_userdict("/home/soft/new.txt")#读txt内容
#添加停用分词
#stopwords=[u'自己',u'他们',u'我们',u'不是',u'只是']
f_stop = open('/home/soft/stop.txt','r').read()
stopwords = unicode(f_stop,'utf-8')
f = open('/home/soft/wordcloud.txt','r').read().decode('gbk')
def jiebaclearText(text):
    mywordlist = []
    seg_list =jieba.cut(text,cut_all=False)
    for myword in seg_list:
        if myword not in stopwords:
           mywordlist.append(myword)
    return ' '.join(mywordlist)
cut_text = jiebaclearText(f)
#print cut_text[0:200]
color_mask = imread("/home/soft/star.jpg")# 读取背景图片
cloud = WordCloud(font_path='/usr/share/fonts/wqy-zenhei/wqy-zenhei.ttc',#设置字体,不指定就会出现乱码
                  background_color="white",width=1000, height=860, margin=2,#margin为词语边缘距离
                  mask=color_mask,max_words=100,max_font_size=100)
word_cloud = cloud.generate(cut_text)# 产生词云
word_cloud.to_file("cloud.jpg")
#c = Counter(cut_text).most_common(100)
#print c
plt.imshow(word_cloud)
plt.axis("off")
plt.show()
运行结果如下

matplotlib与pyecharts:词云可视化_第1张图片matplotlib与pyecharts:词云可视化_第2张图片

你可能感兴趣的:(matplotlib与pyecharts:词云可视化)