def get_commodity(keyword,page): start_url='https://search.jd.com/Search?keyword={}&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&page={}&click=0'.format(str(keyword),str(page)) jd_data=requests.get(start_url) jd_data.encoding='utf8' time.sleep(1) soup=BeautifulSoup(jd_data.text,"lxml") #价格 goods_prices=soup.select(".gl-i-wrap strong i") #名称 goods_titles=soup.select(".p-name a em") #商品链接 goods_urls=soup.select(".p-name a") #评论数量 goods_comments=soup.select(".p-commit a") #图片 goods_imgs=soup.select('.p-img img[width="220"]') print(goods_imgs) for goods_title,goods_price,goods_url,goods_comment,goods_img in zip(goods_titles,goods_prices,goods_urls,goods_comments,goods_imgs): data={ 'goods_title':goods_title.get_text(), 'goods_price':goods_price.get_text(), 'goods_url':goods_url.get('href'), 'goods_comment':goods_comment.get_text(), 'goods_img':goods_img.get('src') if goods_img.get('src')!=None else goods_img.get('data-lazy-img') }
开心,第一次写爬虫,如此笨的我,竟然想到了用三目运算把不同标签定义的图片给获取了,加油!