scrapy

编写middleware.py 文件中的类

from fake_useragent import UserAgent
class RandomUserAgentMiddleware(object):

    def __init__(self, crawler):
        super(RandomUserAgentMiddleware, self).__init__()
        self.ua = UserAgent()

    @classmethod
    def from_crawler(cls, crawler):
        return cls(crawler)

    def process_requests(self, request, spider):
        request.headers.setdefault("User-Agent", self.ua.random)

为每个spider配置私有配置

class MySpider(scrapy.Spider):
    name = 'myspider'
    custom_settings = {
        'SOME_SETTING': 'some value',
    }
# 这个优先级要比settings.py中的要高,通过custom_settings中的配置会覆盖settings.py中的配置。

你可能感兴趣的:(scrapy)