我们在网购时,一般都会参考物品的评价。物品的评价一般是按好评、中评和差评来进行分类的。评论如果是文字浏览,还不够直观,若能增加评词的图形化展示,将更利于消费者选购。
import re
import jieba
import wordcloud
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
with open('d:/bookComments.txt','r') as fp:
bookComments=fp.read()
r='[。,.!?&!;?*:\()()《》]+'
Comments_data=re.sub(r,'',bookComments)
Comments=Comments_data.split('\n')
print("一共有{}条书评;".format(len(Comments)))
CommentsTuple=tuple(Comments)
with open('d:/bookCommentsNew.txt','w') as fp:
for item in CommentsTuple:
fp.write(item+'\n')
for item in CommentsTuple:
aset=set(item)
rule = lambda x:len(set(x))/20>0.6
result = filter(rule,CommentsTuple)
with open('d:/bookCommentsNew.txt','w') as fp:
for item in result:
fp.write(item+'\n')
Comments_list_exact = jieba.cut(bookComments,cut_all=False)
d=dict()
for key in Comments_list_exact:
d[key]= d.get(key,0)+1
print(d)
mask = np.array(Image.open('d:/png/beijing.png'))
wc=wordcloud.WordCloud(
font_path='C:/Windows/Fonts/simhei.ttf',
width=500,height=400,
mask=mask,
max_words=200,
max_font_size=100,
background_color='white',
font_step=3,
random_state=True,
prefer_horizontal=0.9)
wc.generate_from_frequencies(d)
image_colors=wordcloud.ImageColorGenerator(mask)
wc.recolor(color_func=image_colors)
plt.imshow(wc)
plt.axis('off')
plt.show()
①首先先下载清华园
③选择“+”选择需要下载的插件