Python绘制红楼梦词云

import jieba
from wordcloud import WordCloud
import imageio
mask = imageio.imread('china.jpg')
#读取文件
with open('novel/hongloumeng.txt','r',encoding='UTF-8') as f:
    data = f.read()
#分词
word_list =jieba.lcut(data)
# print(word_list)

#创建一个无关词组的集合
exclude={"什么","一个","我们","你们","如今","说道","知道","姑娘","起来","这里","出来","众人","那里","自己","一面"
         ,"只见","两个","没有","怎么","不是","不知","这个","听见","这样","进来","咱们","就是","东西","告诉"
         ,"回来","只是","大家","只得","这些","他们","丫头","不敢","出去","所以","不过","不好","姐姐","姐姐"
         ,"的话","一时","鸳鸯","鸳鸯","心里","不能","过来","她们","如此","银子","今日","二人","答应","黛玉"
         ,"宝玉","凤姐儿","贾母"}#,"","","",





#创建一个字典,存储数据
count = {}

#筛选数据,名字小于2的删掉,大于等于二的存在字典
for i in(word_list):
    if len(i) <= 1:
        continue
    else:
        count[i] = count.get(i, 0) + 1
# print(count)




#把重复的人物合并成一个名字

count['贾宝玉'] = count['宝玉'] +count['宝二爷']+count['贾宝玉']
count['林黛玉'] = count['黛玉']+count['无字'] +count['林黛玉']
count['凤姐']=count['凤姐儿']+count['凤姐']
count['老太太'] = count['老太太']+count['贾母']

#把多余的词组删除

for word in exclude:
    del count[word]


#将字典转换成列表,并排序
items = list(count.items())
items.sort(key=lambda x: x[1], reverse=True)




#显示排行前10的人物
role_list = []
for i in range(20):
    role, count = items[i]
    print(role, count)
    for _ in range (count):
        role_list.append(role)
# print(role_list)


text = " ".join(role_list)
print(text)


WordCloud(
    background_color='white',
    mask=mask,
    font_path='msyh.ttc',
    collocations=False
    ).generate(text).to_file('红楼梦.png')
Python绘制红楼梦词云_第1张图片
红楼梦

你可能感兴趣的:(Python绘制红楼梦词云)