[python]生成词云

三个步骤:分割——统计词频——绘制词云

# coding: utf-8
'''''
词云
'''
import numpy as np
import pandas as pd
from wordcloud import WordCloud, ImageColorGenerator
import matplotlib.pyplot as plt
from PIL import Image
import jieba

#分割语料
def wordcut(filename,tofilename):
    filepath1 = open(filename, encoding="utf-8-sig").read().split('\n') #一行行读取要生成词云的文件
    stopwords = {
     }.fromkeys([line.rstrip() for line in open("stopwords.txt", encoding="utf-8")]) #打开停用词表
    s_content = [] #建立存储分词的列表
    for i in range (len(filepath1)-1):
        result = []
        seg_list = jieba.cut(filepath1[i])#对每一行进行分词
        for w in seg_list:
            if w not in stopwords and w != '':#去除停用词
                result.append(w)
        s_content.append(result)
    s_contents = pd.DataFrame({
     's.clean':s_content})
    print(s_contents.head())

    #统计词频
    wc_dict = {
     }
    for i in range (len(filepath1)-1):
        for word in s_content[i]:
            if len(word) == 1:continue
            else:
                wc_dict[word] = wc_dict.get(word,0)+1
    # print(wc_dict)
    df = pd.DataFrame(pd.Series(wc_dict), columns=['val'])
    df = df.reset_index().rename(columns={
     'index':'name'})
    df.to_csv(tofilename, mode='w', encoding= 'utf_8_sig')#输出词频表

#根据词频绘制词云
def draw_cloud(read_name):
    image = Image.open('beijing.jpg')  #读取背景图
    graph = np.array(image)
    # 参数分别是指定字体、背景颜色、最大的词的大小、使用给定图作为背景形状
    wc = WordCloud(font_path='simkai.ttf', background_color="white", max_words=100, mask=graph,prefer_horizontal=1)
    fp = pd.read_csv(read_name, encoding='utf-8')  #读取词频表
    name = list(fp.name) 
    value = fp.val 
    for i in range(len(name)):
        name[i] = str(name[i])
    dic = dict(zip(name, value))  
    wc.generate_from_frequencies(dic)  
    image_color = ImageColorGenerator(graph)
    plt.imshow(wc)
    plt.axis("off")  # 不显示坐标轴
    plt.show()
    wc.to_file('wc-3.png')  # 词云图命名


if __name__ == '__main__':
	name1 = 'teacher_chat.csv'#要做词云的文件名
	name2 = 'wcdict.csv'#词频表的文件名
    wordcut(name1,name2)
    draw_cloud(name2)

你可能感兴趣的:(python)