词云图
代码1:
# 词云图
# 最简单的
from wordcloud import WordCloud
import jieba
import matplotlib.pyplot as plt
# 导入文本数据并进行简单的文本处理
# 去掉换行符和空格
datafile = '/Users/cherish/Documents/学习3_Python/_Practice/PracticalProject_basic/basic projects/_01_Drawing/matplotlib/file/以梦为马.txt''
#datafile = 'D:\\python\\__self projects__\\basic projects\\_01_Drawing\\matplotlib\\file\\以梦为马.txt'
text = open(datafile).read()
text = text.replace('\n', '').replace(' ', '')
text_cut = jieba.lcut(text) # 分词,返回结果为词的列表
text_cut = ' '.join(text_cut) # 将分好的词用某个符号分割开连接成字符串
# 导入停词
# stop_words = open(
# 'D:\\python\\__self projects__\\basic projects\\_01_Drawing\\matplotlib\\file\\stop_words.txt').read().split('\n')
stop_words = open('/Users/cherish/Documents/学习3_Python/_Practice/PracticalProject_basic/basic projects/_01_Drawing/matplotlib/file/stop_words.txt').read().split('\n')'
# 使用wordcloud生成词云
word_cloud = WordCloud(
#font_path='C:\\Windows\\Fonts\\simkai.ttf', # 设置词云字体
font_path='C:\\Windows\\Fonts\\simkai.ttf',
background_color='white', # 词云图的背景颜色
stopwords=stop_words) # 去掉的停词
word_cloud.generate(text_cut)
# 运用matplotlib展现结果
plt.subplots(figsize=(12, 8))
plt.imshow(word_cloud)
plt.axis('off')
plt.show()
代码2:
# 词云图:以福字 带特定形状的词云
from PIL import Image
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import jieba
import numpy as np
# 导入文本数据并进行文本处理
text = open(
'D:\\python\\__self projects__\\basic projects\\_01_Drawing\\matplotlib\\file\\此生未完成.txt').read()
text = text.replace('\n', '').replace(' ', '')
text_cut = jieba.lcut(text)
text_cut = ' '.join(text_cut)
# 导入停词
stop_words = open(
'D:\\python\\__self projects__\\basic projects\\_01_Drawing\\matplotlib\\file\\stop_words.txt').read().split('\n')
# 导入背景图片
background = Image.open(
'D:\\python\\__self projects__\\basic projects\\_01_Drawing\\matplotlib\\file\\黑猫.png'
)
graph = np.array(background)
# 生成词云
word_cloud = WordCloud(
font_path='C:\\Windows\\Fonts\\simkai.ttf', # 词库路径
background_color='white',
max_words=1000, # 最大允许1000个词去组成这个词云图
max_font_size=100, # 词语最大尺寸
width=1500, # 宽度
height=1500, # 高度
mask=graph, # 指定词云的形状
stopwords=stop_words)
word_cloud.generate(text_cut)
plt.subplots(figsize=(12, 8))
plt.imshow(word_cloud)
plt.axis('off')
plt.show()
代码3:
# 词云图:wordcloud,增加从图片获取颜色色系
import matplotlib.pyplot as plt
import jieba
import numpy as np
from wordcloud import WordCloud
from wordcloud import ImageColorGenerator # 从图片中提取调色方案
from PIL import Image
text_file = 'D:\\python\\__self projects__\\basic projects\\_01_Drawing\\matplotlib\\file\\Jane Eyre.txt' # 文本路径
stopword_file = 'D:\\python\\__self projects__\\basic projects\\_01_Drawing\\matplotlib\\file\\stop_words.txt' # 停词路径
img_file = 'D:\\python\\__self projects__\\basic projects\\_01_Drawing\\matplotlib\\file\\美国地图_星条旗色.png' # 图片路径
font_file = 'D:\\python\\__self projects__\\basic projects\\_01_Drawing\\matplotlib\\file\\msyhl.ttc' # 字体路径
def get_text(text_file): # 文本处理
text = open(text_file).read()
text = text.replace('\n', '').replace(' ', '')
words = jieba.lcut(text)
words = ' '.join(words)
return words
def get_stopw(stopword_file): # 停词处理
stopword = open(stopword_file).read().split('\n')
return stopword
def get_img(img_file): # 背景图片处理
global usa_mask
background = Image.open(img_file)
usa_mask = ImageColorGenerator(np.array(background))
return np.array(background)
def make_wordcloud(words, stopwds, graph, font_file): # 生成云图
word_cloud = WordCloud(
# font_path=font_file, # 字体路径,中文需指定
mask=graph, # 背景图
background_color='white', # 背景颜色
height=400,
width=800,
scale=20, # 长宽拉伸比例为20
# random_state=50, # 随机角度
max_words=1000, # 最多1000个
max_font_size=80, # 最大字为100
# min_font_size=4,
relative_scaling=0.3, # 设置字体大小与词频的关联程度为0.3
prefer_horizontal=0.2, # 调整水平显示倾向程度
# contour_width=1,
# contour_color='steelblue',
# stopwords=stopwds # 去除停词
)
return word_cloud.generate(words)
# return word_cloud.generate_from_text(words)
def show(wordcloud):
plt.figure(figsize=(8, 4))
plt.imshow(wordcloud.recolor(color_func=usa_mask), alpha=1)
plt.axis('off')
plt.show()
def save_it(wordcloud, name): # 保存
filename = '{}.png'.format(name)
plt.savefig(filename, dpi=600, bbox_inches='tight', quality=95)
if __name__ == "__main__":
words = get_text(text_file)
stopwds = get_stopw(stopword_file)
graph = get_img(img_file)
wordcloud = make_wordcloud(words, stopwds, graph, font_file)
show(wordcloud)
save_it(wordcloud, '美国国旗')