爬取京东商品及价格

京东商品及价格存入csv文本,只有静态的30个逐页爬,动态的s=30,87,141,206,   n=2,4,6,8。

可以再下面在写个函数直接存到文本里,就是这个参数:

把图片往右拖,network,里的XHR的链接规则:

爬取京东商品及价格_第1张图片

代码:

import requests
from urllib.parse import urlencode
from lxml import etree
import csv

def request(kw,page,s):
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
               "Cookie":"__jdu=965081754; shshshfpa=d8651c76-9914-ed87-bb05-6f3d29a46061-1543231749; shshshfpb=0a7cbd16444b16711e44638105fd14f758419bbc053620b7f5bfbd9064; qrsc=3; __jdc=122270672; __jdv=122270672|direct|-|none|-|1547172752163; PCSYCityID=698; xtest=8541.cf6b6759; ipLoc-djd=1-72-2799-0; rkv=V0800; user-key=9f422950-49ab-45ca-b5a9-eb6be105167d; cn=0; shshshfp=8324eeb76ab14ec95fcfee30162c5b01; __jda=122270672.965081754.1543231746.1547261070.1547271971.11; 3AB9D23F7A4B3C9B=ZYOJUA2PVZ4SVMVJ5XVNWWSIHARIPPGP6NLOLYB3TMOOO3G3BCLQGESLJZPYSHQMFOYFKF7L4CCWBDB3DZJPM7ILPI",
               'authority': 'search.jd.com',
               'method': 'GET',
               'path': '/s_new.php?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E6%89%8B%E6%9C%BA&cid2=653&cid3=655&page=4&s=84&scrolling=y&log_id=1529828108.22071&tpl=3_M&show_items=7651927,7367120,7056868,7419252,6001239,5934182,4554969,3893501,7421462,6577495,26480543553,7345757,4483120,6176077,6932795,7336429,5963066,5283387,25722468892,7425622,4768461',
               'scheme': 'https',
               'x-requested-with': 'XMLHttpRequest',
               'referer': 'https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E6%89%8B%E6%9C%BA&cid2=653&cid3=655&page=3&s=58&click=0',
               }
    params = {
        'keyword': kw,
        'enc': 'utf-8',
        'wq': kw,
        'qrst': '1',
        'rt': '1',
        'stop': '1',
        'vt': '2',
        'page': page,
        's': s,
        'click':'0',
    }
    url = 'https://search.jd.com/Search?'+urlencode(params)
    response = requests.get(url,headers=headers)
    response.encoding = 'utf-8'

    return response

def write(jspn):

    html_obj = etree.HTML(jspn.text)
    datas = html_obj.xpath('//li[contains(@class,"gl-item")]')

    with open('JD_jd.csv','a',newline='',encoding='utf-8-sig')as f:
        write = csv.writer(f,dialect='excel')
        for data in datas:
            p_price = data.xpath('div/div[@class="p-price"]/strong/i/text()')
            # p_comment = data.xpath('div/div[5]/strong/a/text()')
            p_name = data.xpath('div/div[@class="p-name p-name-type-2"]/a/em/text()')

            if len(p_price) == 0:
                p_price = data.xpath('div/div[@class="p-price"]/strong/@data-price')

            write.writerow([p_name[0], p_price[0]])

    f.close()

def main(keyword):
    kw = keyword
    page = 1
    s = 1
    for i in range(5):
        page +=2 * i
        s += 53 * i
        jspn = request(kw,page,s)
        write(jspn)
if __name__ == "__main__":
    keyword = input("要搜索的东西")
    main(keyword)

 

你可能感兴趣的:(爬虫)