获取亚马逊列表页商品链接及进入详情页获取名字(为了测试出机器人页面)

```

import requests

from lxmlimport etree

import OpenSSL

import time

import json

s ='https://www.amazon.com'

i =1

a_list =[]

cont=120

# url = 'https://www.amazon.com/Best-Sellers-Appstore-Android-Customization/zgbs/mobile-apps/9408481011/ref=zg_bs_nav_mas_1_mas'

# url1 = 'https://www.amazon.com/Best-Sellers-Appstore-Android-Customization/zgbs/mobile-apps/9408481011/ref=zg_bs_pg_2?_encoding=UTF8&pg=1'

headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}

proxies = {

'http':'114.99.7.122:8752',

}

try:

while i<=50:

# doc = requests.get("https://www.amazon.com/Best-Sellers-Appstore-Android-Customization/zgbs/mobile-apps/9408481011/ref=zg_bs_pg_2?_encoding=UTF8&pg=%d"%i,headers=headers)

        doc = requests.get("https://www.amazon.com/s/ref=sr_pg_2?fst=as%3Aoff&rh=n%3A10158976011%2Cn%3A1055398&page={0}&bbn=10158976011&ie=UTF8&qid=1533365790".format(i),headers=headers)

print(doc)

doc.encoding ='utf-8'

        resoult = doc.text

res = etree.HTML(resoult)

# a_resoult = res.xpath("//ol/li/span[@class='a-list-item']/div[contains(@class,'a-section')]/span/a/@href")

        a_resoult = res.xpath("//div[@id='mainResults' or @id='centerMinus' or @id='btfResults']//ul//li[contains(@id,'result')]/div[@class='s-item-container']/div[contains(@class,'a-spacing-mini')]/div[contains(@class,'sx-line-clamp-4')]/a/@href")

i +=1

        for xin a_resoult:

r = x

print(r)

a_list.append(r)

except Exception as e:

print(e)

for ein a_list:

# if cont<=100:

    a_list.remove(e)

doc = requests.get(e)

# doc.encoding = 'utf-8'

    resoult1 = doc.text

res1 = etree.HTML(resoult1)

print(resoult1)

# a_resoult2 = res1.xpath("//span[@id='actualPriceValue']/strong[@class='priceLarge']/text()")

    a_resoult2 = res1.xpath("//div[@id='title_feature_div']/div[@id='titleSection']/h1[@id='title']/span[@id='productTitle']/text()")

if a_resoult2:

print(a_resoult2)

else:

cont +=1

        print(e)

res2 = etree.HTML(resoult1)

res3 = res2.xpath("//div[contains(@class,'a-text-center')]/img/@src")

if res3:

print(res3[0])

response = requests.get(res3[0])

img = response.content

with open('./img1/{0}.jpg'.format(cont),'wb')as f:

f.write(img)

else:

pass

```

你可能感兴趣的:(获取亚马逊列表页商品链接及进入详情页获取名字(为了测试出机器人页面))