day7、把原下载器修改成selenium+webdriver

把原下载器修改成selenium+webdriver

jd.py

# -*- coding: utf-8 -*-
import scrapy


class JdSpider(scrapy.Spider):
    name = 'jd'
    allowed_domains = ['jd.com', 'vip.com']
    start_urls = ['http://www.vip.com/']

    def parse(self, response):
        print('_________________________')
        print(response.text)
        pass

middlewares.py

from selenium import webdriver
from scrapy.http import HtmlResponse
from time import sleep

    def process_request(self, request, spider):
        #这个方法当下载器被调度起来时被调用
        #把下载器修改成selenium+webdriver
        driver = webdriver.Chrome()
        driver.get(request.url)
        sleep(3)
        res = driver.page_source
        body = etree.HTML(res)
        print("下载中间件正在下载内容")
        print("当前访问的网址" + driver.current_url)
        return HtmlResponse(url=driver.current_url, body=body, encoding='utf-8', request=request)

settings.py
(55)

DOWNLOADER_MIDDLEWARES = {
    # 开启了下载器中间件
   'seven_jingdong.middlewares.SevenJingdongDownloaderMiddleware': 543,
    # 需要关闭掉原来的下载组件
    "scrapy.downloadermiddlewares.useragent.UserAgentMiddleware": None
}

你可能感兴趣的:(day7、把原下载器修改成selenium+webdriver)