多个spider用不同的代理(指定城市的ip)

class proxyMiddleware(object):
def __init__(self):
self.ip_pool_cc = []
self.get_ip_url_cc = 'http://xxxxx'
self.ip_pool_cq = []
self.get_ip_url_cq = 'http://xxxxx'

def process_request(self, request, spider):
if "zlwmw_cc" in spider.name:
url_test = "http://xxxxxxxxx"
pro_addr = self.base_proxy(self.ip_pool_cc,self.get_ip_url_cc,url_test)
request.meta['proxy'] = "http://" + pro_addr
elif "zlwmw_cq" in spider.name:
url_test = "http://xxxxxxxxx"
pro_addr = self.base_proxy(self.ip_pool_cq, self.get_ip_url_cq,url_test)
request.meta['proxy'] = "http://" + pro_addr


def base_proxy(self,ip_pool,get_ip_url_0,url_test):
while True:
# time.sleep(0.1)
if len(ip_pool) < 5:
get_ip_url = get_ip_url_0
ips_0 = requests.get(get_ip_url).text
#{"code":111,"data":[],"msg":"请2秒后再试","success":false}
if "请2秒后再试" in ips_0:
time.sleep(2)
ips_0 = requests.get(get_ip_url).text
if "请2秒后再试" in ips_0:
time.sleep(4)
ips_0 = requests.get(get_ip_url).text
if "请2秒后再试" in ips_0:
time.sleep(6)
ips_0 = requests.get(get_ip_url).text
ips = ips_0.split('\n')
for i in ips[:-1]:
ip_pool.append(i.strip())
else:
break
pro_addr = random.choice(ip_pool)
while True:
# time.sleep(0.1)
url = url_test
proxies = {
"http": pro_addr,
}
try:
s = requests.session()
s.keep_alive = False # 关闭多余连接
response = s.get(url=url, proxies=proxies, timeout=2, verify=False)
code = response.status_code
except Exception as e:
print(e)
code = '0'
if code == 200:
return pro_addr
break
else:
ip_pool.remove(pro_addr)
while True:
if len(ip_pool) < 5:
get_ip_url = get_ip_url_0
ips_0 = requests.get(get_ip_url).text
if "请2秒后再试" in ips_0:
time.sleep(2)
ips_0 = requests.get(get_ip_url).text
if "请2秒后再试" in ips_0:
time.sleep(4)
ips_0 = requests.get(get_ip_url).text
if "请2秒后再试" in ips_0:
time.sleep(6)
ips_0 = requests.get(get_ip_url).text
ips = ips_0.split('\n')
for i in ips[:-1]:
ip_pool.append(i.strip())
else:
break
pro_addr = random.choice(ip_pool)

你可能感兴趣的:(多个spider用不同的代理(指定城市的ip))