Amazon

import requests
from bs4 import BeautifulSoup

url = 'https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=mouse'

wb_data = requests.get(url)

soup = BeautifulSoup(wb_data.text, 'lxml')

ex = soup.select('ul#s-results-list-atf > li.s-result-item.celwidget')


#result_0 > div > div > div > div.a-fixed-left-grid-col.a-col-right > div.a-row.a-spacing-small > div:nth-child(1) > a
title = ex[2].select('div.a-row.a-spacing-none > a')[0]['title']
price = ex[2].select('span.a-color-base.sx-zero-spacing')[0]['aria-label']
review = ex[2].select('div.a-row.a-spacing-mini > a.a-size-small.a-link-normal.a-text-normal')[0].get_text()
star = ex[2].select('span.a-icon-alt')[1].get_text().split(' ')[0]
imageUrl = ex[2].select('div.a-row > div > a.a-link-normal.a-text-normal > img')[0]['src']
link = ex[2].select('div.a-row.a-spacing-none > a')[0]['href']

print(title, price, link, star, review, imageUrl)
# coding:utf-8
import os
from bs4 import BeautifulSoup
import requests

data = open(r'F:\mouse.htm', 'rb')
data_ = data.read()
soup = BeautifulSoup(data_, 'lxml')

ex = soup.select('ul#s-results-list-atf > li.s-result-item.celwidget')


def getInfo(P):
    try:
        title = P.select('h2.a-size-medium.s-inline.s-access-title.a-text-normal')[0]['data-attribute']
    except:
        title = 'None'
    try:
        price = P.select('span.a-color-base.sx-zero-spacing')[0]['aria-label']
    except:
        price = 'None'
    try:
        review = P.select('div.a-row.a-spacing-mini > a.a-size-small.a-link-normal.a-text-normal')[0].get_text()
    except:
        review = 'None'
    try:
        star = P.select('span.a-icon-alt')[1].get_text().split(' ')[0]
    except:
        star = 'None'
    try:
        imageUrl = \
            P.select('div.a-row > div > a.a-link-normal.a-text-normal > img')[0]['srcset'].split(',')[0].split('1x')[0].split(' ')[0]
        down(imageUrl)
    except:
        imageUrl = 'None'
    try:
        link = P.select('div.a-row.a-spacing-none > a')[0]['href']
    except:
        link = 'None'
    try:
        data___ = dict(Title=title, Price=price, Review=review, Star=star, ImageUrl=imageUrl, Link=link)
        print(data___['ImageUrl'])
    except:
        pass


def down(url):
    r = requests.get(url)
    Img = url.split('https://images-na.ssl-images-amazon.com/images/I/')[1].split('.')[0]
    target = './img/{}.jpg'.format(Img)
    with open(target, 'wb') as fs:
        fs.write(r.content)
    print('%s => %s' % (url, target))


for i in ex:
    getInfo(i)

你可能感兴趣的:(Amazon)