python+request 京东商品信息爬取

def get_commodity(keyword,page):
    start_url='https://search.jd.com/Search?keyword={}&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&page={}&click=0'.format(str(keyword),str(page))
    jd_data=requests.get(start_url)
    jd_data.encoding='utf8'
    time.sleep(1)
    soup=BeautifulSoup(jd_data.text,"lxml")
    #价格
    goods_prices=soup.select(".gl-i-wrap strong i")
    #名称
    goods_titles=soup.select(".p-name a em")
    #商品链接
    goods_urls=soup.select(".p-name a")
    #评论数量
    goods_comments=soup.select(".p-commit a")
    #图片
    goods_imgs=soup.select('.p-img img[width="220"]')
    print(goods_imgs)
    for goods_title,goods_price,goods_url,goods_comment,goods_img in zip(goods_titles,goods_prices,goods_urls,goods_comments,goods_imgs):
        data={
            'goods_title':goods_title.get_text(),
            'goods_price':goods_price.get_text(),
            'goods_url':goods_url.get('href'),
            'goods_comment':goods_comment.get_text(),
            'goods_img':goods_img.get('src') if goods_img.get('src')!=None else goods_img.get('data-lazy-img')
        }
开心,第一次写爬虫,如此笨的我,竟然想到了用三目运算把不同标签定义的图片给获取了,加油!

你可能感兴趣的:(Python)