官方文档
docker pull scrapinghub/splash
docker run -it -d -p 8050:8050 --rm scrapinghub/splash
在浏览器输入ip+host,并请求京东
可以看到
输入http://localhost:8050/render.html?url=https://search.jd.com/Search?keyword=%E5%B0%8F%E7%B1%B310&enc=utf-8&suggest=1.def.0.V08–38s0&wq=%E5%B0%8F%E7%B1%B3&pvid=c18d37ab55764cc4ac71e124bc496035
curl "http://codekiller.top:8050/render.html?url=https://search.jd.com/Search?keyword=%E5%B0%8F%E7%B1%B310&enc=utf-8&suggest=1.def.0.V08--38s0&wq=%E5%B0%8F%E7%B1%B3&pvid=c18d37ab55764cc4ac71e124bc496035" -o 小米.html
打开htm文件
操作(获取所有价格)
from lxml import etree
file = open('C:\\Users\\MyPC\\小米.html', "r", encoding="UTF-8")
text = file.read()
selector = etree.HTML(text)
prices = selector.xpath("//div[@class='p-price']/strong/i/text()")
print(prices)
from urllib.parse import urlparse, urlencode, quote
from lxml import etree
import requests
ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
headers = {"User-Agent": ua}
keyword = "小米"
params = dict(
keyword=keyword,
enc="utf-8",
wq=keyword,
pvid="57486a4adb40455dbba829de75133672"
)
query_string = "&".join(("%s=%s" % (K, V)) for K, V in params.items())
jd_url = "https://search.jd.com/Search?" + query_string
url = "http://codekiller.top:8050/render.html?url=" + quote(jd_url)
r = requests.get(url, headers=headers)
selector = etree.HTML(r.text)
price_list = selector.xpath("//div[@class='p-price']/strong/i/text()")
name_list = selector.xpath("//div[contains(@class,'p-name')]/a/em/text()")
for name, price in zip(price_list, name_list):
print(name, price)