京东商品及价格存入csv文本,只有静态的30个逐页爬,动态的s=30,87,141,206, n=2,4,6,8。
可以再下面在写个函数直接存到文本里,就是这个参数:
把图片往右拖,network,里的XHR的链接规则:
代码:
import requests
from urllib.parse import urlencode
from lxml import etree
import csv
def request(kw,page,s):
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
"Cookie":"__jdu=965081754; shshshfpa=d8651c76-9914-ed87-bb05-6f3d29a46061-1543231749; shshshfpb=0a7cbd16444b16711e44638105fd14f758419bbc053620b7f5bfbd9064; qrsc=3; __jdc=122270672; __jdv=122270672|direct|-|none|-|1547172752163; PCSYCityID=698; xtest=8541.cf6b6759; ipLoc-djd=1-72-2799-0; rkv=V0800; user-key=9f422950-49ab-45ca-b5a9-eb6be105167d; cn=0; shshshfp=8324eeb76ab14ec95fcfee30162c5b01; __jda=122270672.965081754.1543231746.1547261070.1547271971.11; 3AB9D23F7A4B3C9B=ZYOJUA2PVZ4SVMVJ5XVNWWSIHARIPPGP6NLOLYB3TMOOO3G3BCLQGESLJZPYSHQMFOYFKF7L4CCWBDB3DZJPM7ILPI",
'authority': 'search.jd.com',
'method': 'GET',
'path': '/s_new.php?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E6%89%8B%E6%9C%BA&cid2=653&cid3=655&page=4&s=84&scrolling=y&log_id=1529828108.22071&tpl=3_M&show_items=7651927,7367120,7056868,7419252,6001239,5934182,4554969,3893501,7421462,6577495,26480543553,7345757,4483120,6176077,6932795,7336429,5963066,5283387,25722468892,7425622,4768461',
'scheme': 'https',
'x-requested-with': 'XMLHttpRequest',
'referer': 'https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E6%89%8B%E6%9C%BA&cid2=653&cid3=655&page=3&s=58&click=0',
}
params = {
'keyword': kw,
'enc': 'utf-8',
'wq': kw,
'qrst': '1',
'rt': '1',
'stop': '1',
'vt': '2',
'page': page,
's': s,
'click':'0',
}
url = 'https://search.jd.com/Search?'+urlencode(params)
response = requests.get(url,headers=headers)
response.encoding = 'utf-8'
return response
def write(jspn):
html_obj = etree.HTML(jspn.text)
datas = html_obj.xpath('//li[contains(@class,"gl-item")]')
with open('JD_jd.csv','a',newline='',encoding='utf-8-sig')as f:
write = csv.writer(f,dialect='excel')
for data in datas:
p_price = data.xpath('div/div[@class="p-price"]/strong/i/text()')
# p_comment = data.xpath('div/div[5]/strong/a/text()')
p_name = data.xpath('div/div[@class="p-name p-name-type-2"]/a/em/text()')
if len(p_price) == 0:
p_price = data.xpath('div/div[@class="p-price"]/strong/@data-price')
write.writerow([p_name[0], p_price[0]])
f.close()
def main(keyword):
kw = keyword
page = 1
s = 1
for i in range(5):
page +=2 * i
s += 53 * i
jspn = request(kw,page,s)
write(jspn)
if __name__ == "__main__":
keyword = input("要搜索的东西")
main(keyword)