'''
scrapy 自定义下载中间件
动态设置User-Agent
'''
import random
class RandomUserAgent:
def __init__(self, agents):
self.agents = agents
@classmethod
def from_crawler(cls, crawler):
# 从Settings中加载USER_AGENTS的值
return cls(crawler.settings.getlist('USER_AGENTS'))
def process_request(self, request, spider):
# 在process_request中设置User-Agent的值
request.headers.setdefault('User-Agent', random.choice(self.agents))
'''
动态设置代理ip
'''
class RandomProxy:
def __init__(self, iplist):
self.iplist = iplist
@classmethod
def from_crawler(cls, crawler):
# 加载IPLIST
return cls(crawler.settings.getlist('IPLIST'))
def process_request(self, request, spider):
proxy = random.choice(self.iplist)
request.meta['proxy'] = proxy
# 在settings中设置USER_AGENTS和IPLIST,并激活该中间件。