wordcloud词云分析及词频统计绘图

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Date    : 2017-10-18 17:52:25
# @Author  : awakeljw 
# @Link    : http://blog.csdn.net/awakeljw/
# @Version : $Id$


from wordcloud import WordCloud
import jieba
import PIL
import matplotlib.pyplot as plt
import numpy as np
from collections import Counter
import matplotlib
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['SentyTang'] # 指定默认字体
mpl.rcParams['axes.unicode_minus'] = False  
matplotlib.rc('xtick', labelsize=14)
matplotlib.rc('ytick', labelsize=14)
def wordcloudplot(txt):
    path='字体文件'
#    path=unicode(path, 'utf8').encode('gb18030')
    alice_mask = np.array(PIL.Image.open('图片文件'))
    wordcloud = WordCloud(font_path=path, 
                          background_color="white",   
                          margin=5, width=1800, height=800,mask=alice_mask,max_words=2000,max_font_size=60,random_state=42) 
    wordcloud = wordcloud.generate(txt)
    wordcloud.to_file('输出文件')
    plt.imshow(wordcloud)
    plt.axis("off")
    plt.show()

rem = [',','、','。','的','和','\u3000','图','串','“','”',' ','与','是','端','在','中','了','\n']    
def main():
    a=[]
    f=open(r'I:\tensorflow\ciyun\paper.txt','r',encoding='gb18030').read()
    words=list(jieba.cut(f))
    tongji = Counter(words).most_common(20)

    d = {key: value for (key, value) in tongji}  

    for i in list(d.keys()):
	    if i in rem:
		    d.pop(i) 
    print (d)
    label = list(d.keys())
    y = list(d.values())
    idx = np.arange(len(y))
    plt.barh(idx,y)
    plt.yticks(idx+0.4,label) 
    plt.xlabel('出现次数',fontsize = 20,labelpad = 5)
    plt.ylabel('关键词',fontsize= 20,labelpad = 5)
    plt.title('涡流发生器对激波串振荡的控制',fontsize= 25)
    plt.savefig('输出词频图标')
    #plt.show()
    #绘制pie chart on polar axis
    N = len(d)
    theta = np.arange(0.0, 2*np.pi,2*np.pi/N)
    radii = y
    width = np.pi/6
    ax = plt.subplot(111,projection='polar')
    bars = ax.bar(theta, radii, width = width, bottom = 0.0)
    plt.xticks(theta+np.pi/12,label)
    for r, bar in zip(radii, bars):
        bar.set_facecolor(plt.cm.viridis(r / 10.))
        bar.set_alpha(0.5)

    plt.savefig('输出pie极坐标图')
    plt.show()

    for word in words:
        if len(word)>1:
            a.append(word)
    txt=r' '.join(a)
    wordcloudplot(txt)#输出词云

if __name__=='__main__':
    main()

1.安装wordcloud,jieba
http://www.lfd.uci.edu/~gohlke/pythonlibs/#wordcloud下载对应的python版本,在cmd中输入pip install I:/wordcloud-1.3.2-cp35-cp35m-win_amd64.whl安装即可
jieba:pip install jieba
2.修改字体文件
找到python安装位置:C:\Anaconda3\Lib\site-packages\matplotlib\mpl-data\fonts\ttf添加自己下载的中文字体,
删除个人用户下的.matplotlib文件
3.添加字体文件和图片
4.更改代码中的文件和图片路径
5.运行调试
###结果展示
wordcloud词云分析及词频统计绘图_第1张图片
wordcloud词云分析及词频统计绘图_第2张图片
wordcloud词云分析及词频统计绘图_第3张图片

你可能感兴趣的:(python,wordcloud)