爬取淘宝评论并生成词云

import requests
import json
from lxml import etree
import re
import jieba
import matplotlib.pyplot as plt
from wordcloud import WordCloud,ImageColorGenerator
import numpy as np
import PIL.Image as Image

def getCommodityComments(url):
    if url[url.find('id=')+14] != '&':
        id = url[url.find('id=')+3:url.find('id=')+15]
    else:
        id = url[url.find('id=')+3:url.find('id=')+14]
    url = 'https://rate.taobao.com/feedRateList.htm?auctionNumId='+id+'¤tPageNum=1'
    res = requests.get(url)
    jc = json.loads(res.text.strip().strip('()'))
    max = jc['total']
    comments = []
    count = 0
    page = 1
    print('该商品共有评论'+str(max)+'条,具体如下: loading...')
    while count1]+str(page))
        page = page + 1
        jc = json.loads(res.text.strip().strip('()'))
        jc = jc['comments']
        for j in jc:
            comments.append(j['content'])
            print(comments[count])
            count = count + 1
    return comments

goods=input('宝贝链接:')
comments=getCommodityComments(goods)

siglist=[]
for i in comments:
    signature=i.strip().replace('评价方未及时做出评价,系统默认好评!','').replace('此用户没有填写评价。','').replace('span','').replace('class','').replace('emoji','')
    rep=re.compile('1f\d+\w*|[<>/=]')
    signature=rep.sub('',signature)
    siglist.append(signature)
text=''.join(siglist)

wordlist=jieba.cut(text,cut_all=True)
word_space_split=" ".join(wordlist)

coloring=np.array(Image.open("1.jpg"))
my_wordcloud=WordCloud(background_color='white',width=2400,height=2400,max_words=2000,
                       mask=coloring,max_font_size=60,
                       random_state=42,scale=2,
                       font_path="simfang.ttf").generate(
                               word_space_split)
image_colors=ImageColorGenerator(coloring)
plt.imshow(my_wordcloud.recolor(color_func=image_colors))
plt.imshow(my_wordcloud)
plt.axis('off')
plt.show
name=input('图片名称:')
my_wordcloud.to_file('%s.png'%name)

你可能感兴趣的:(爬虫,Python)