python 采集使用代理身份验证的3个情况

1、urllib2:

proxy_support = urllib2.ProxyHandler({"http": "http://"+user+":"+pwd+"@%s" % proxiip})

opener = urllib2.build_opener(proxy_support)

urllib2.install_opener(opener)

requestx = urllib2.Request(url)
        urllib2.urlopen(requestx)

2、requests:

openurl = requests.session()

 # 随机获取代理ip

        proxy = rconnection_yz.srandmember(redis_key_proxy)
        proxyjson = json.loads(proxy)
        proxiip = proxyjson["ip"]
        # openurl.proxies = {'http': 'http://' + proxiip, 'https': 'https://' + proxiip}
        # 若你的代理需要使用HTTP Basic Auth,可以使用 http: // user:password @ host / 语法:eg: "http": "http://user:[email protected]:3128/",
        openurl.proxies = {'http': 'http://'+user+':'+pwd+'@' + proxiip+'/', 'https': 'https://'+user+':'+pwd+'@' + proxiip+'/'}

 req = openurl.get(url, headers=headers, timeout=15)


3、scrapy使用代理身份验证方式:

def process_request(self, request=None, spider=None):
    """
    将request设置为使用代理
    """
    try:
        self.redisclient = redis.Redis(self.REDIS_HOST, self.REDIS_PORT)
        proxy = self.redisclient.srandmember(random.choice(self.proxykeys))
        proxyjson = json.loads(proxy)
        ip = proxyjson["ip"]

        proxy_user_pass = "%s:%s" % (self.IP_USER, self.IP_PWD)
        encoded_user_pass = base64.encodestring(proxy_user_pass)
        request.headers['Proxy-Authorization'] = 'Basic ' + encoded_user_pass
        print ip
        request.meta['proxy'] = "http://%s" % ip

    except Exception, ee:
        print '------------------------------', ee

你可能感兴趣的:(个人汇总,python)