selenium的使用-爬取天猫商品信息

from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

import time
import csv
from pyquery import PyQuery as pq

# 1.构造浏览器对象
option = ChromeOptions()
option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_experimental_option('useAutomationExtension', False)
option.add_argument('--headless')

browser = webdriver.Chrome(options=option)

wait = WebDriverWait(browser, 10)


# 2.初始化网页
browser.maximize_window()
keyword = input("请输入搜索的商品名称:")
url = "https://search.jd.com/Search?keyword=" + keyword
browser.get(url)


filename = keyword + '.csv'
# # 3.循环翻100页,并且解析每一页
for page_num in range(1, 101):
    print("正在获取第" + str(page_num) + "页")
    # 3.1拉到最底下,加载网页
    browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
    time.sleep(3)

    # 3.2获取商品信息
    doc = pq(browser.page_source)
    lis = doc.find('div#J_goodsList li').items()
    for li in lis:
        goods_name = li.find('.p-name').text().replace('\n', '')
        goods_price = li.find('.p-price').text()
        goods_shop = li.find('.curr-shop.hd-shopname').text()
        goods_item = [goods_name, goods_price, goods_shop]
        with open(filename, 'a', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(goods_item)

    # 3.3翻到下一页
    input_page = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="J_bottomPage"]/span[2]/input')))
    submit = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="J_bottomPage"]/span[2]/a')))

    input_page.clear()
    input_page.send_keys(page_num)
    submit.click()

你可能感兴趣的:(#,Python爬虫)