Jieba分词词频统计绘制词云图Wordcloud套图(2018-11-03)

import jieba

from wordcloud import WordCloud

import matplotlib.pyplot as plt

#AI2017.txt来自http://www.gov.cn/zhengce/content/2017-07/20/content_5211996.htm

with open('AI2017.txt', 'r') as f:

    renmin=f.read()

jieba.load_userdict("AIDict.txt")  #添加词典

seg_list = jieba.cut(renmin, cut_all=False) #分词

tf = {}                  #统计词频

for seg in seg_list:

    if seg in tf:    # 如果该键在集合tf的对象中,则该键所属对象值加1

        tf[seg] +=1

    else:            #否则,生成新词的键值对,初始值为1

        tf[seg] = 1

ci=list(tf.keys())        #将字典的健值转为列表

with open('stopword.txt','r') as ft:

    stopword=ft.read()


for seg in ci:

    if tf[seg]<5 or len(seg)<2 or seg in stopword or '一'in seg:

        tf.pop(seg)

#print(tf)

#统计词频后绘制词云图

from wordcloud import WordCloud,ImageColorGenerator

import matplotlib.pyplot as plt

from PIL import Image

import numpy as np

from scipy.misc import imread

mask_img=np.array(Image.open("heart.png"))

font=r'c:\Windows\Fonts\simfang.ttf'

wc=WordCloud(background_color="white",mask=mask_img,collocations=False,font_path=font, max_font_size=200,width=1600,height=500,margin=0).generate_from_frequencies(tf)

plt.imshow(wc)

plt.axis('off')

plt.show()

# 基于彩色图像生成相应彩色

image_colors = ImageColorGenerator(mask_img)

plt.imshow(wc.recolor(color_func=image_colors))

plt.axis('off')

plt.show()

wc.to_file('AI.jpg')

你可能感兴趣的:(Jieba分词词频统计绘制词云图Wordcloud套图(2018-11-03))