Python Request源码解读之

requests.adapters 是 requests 库中的一个子模块,它包含了一些用于处理 HTTP 请求的适配器类。这些适配器类主要用于处理与 HTTP 请求相关的各种细节,例如超时、重试、连接池管理等。

以下是 requests.adapters 中一些主要的类:
这个类用于处理与 HTTP 连接相关的操作,例如建立连接、发送请求等。它使用连接池来管理连接,以提高性能和效率。
这个类用于处理通过代理服务器发送 HTTP 请求的情况。它使用 HTTPConnectionPool 来管理代理连接。
在使用 requests 库发送 HTTP 请求时,可以根据需要选择不同的适配器。例如,如果要使用自定义的超时设置或重试机制,可以创建一个自定义的适配器类,继承自 HTTPAdapter 并实现相应的功能。

# -*- coding: utf-8 -*-


This module contains the transport adapters that Requests uses to define
and maintain connections.

import os.path
import socket

from urllib3.poolmanager import PoolManager, proxy_from_url
from urllib3.response import HTTPResponse
from urllib3.util import parse_url
from urllib3.util import Timeout as TimeoutSauce
from urllib3.util.retry import Retry
from urllib3.exceptions import ClosedPoolError
from urllib3.exceptions import ConnectTimeoutError
from urllib3.exceptions import HTTPError as _HTTPError
from urllib3.exceptions import MaxRetryError
from urllib3.exceptions import NewConnectionError
from urllib3.exceptions import ProxyError as _ProxyError
from urllib3.exceptions import ProtocolError
from urllib3.exceptions import ReadTimeoutError
from urllib3.exceptions import SSLError as _SSLError
from urllib3.exceptions import ResponseError
from urllib3.exceptions import LocationValueError

from .models import Response
from .compat import urlparse, basestring
from .utils import (DEFAULT_CA_BUNDLE_PATH, extract_zipped_paths,
                    get_encoding_from_headers, prepend_scheme_if_needed,
                    get_auth_from_url, urldefragauth, select_proxy)
from .structures import CaseInsensitiveDict
from .cookies import extract_cookies_to_jar
from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError,
                         ProxyError, RetryError, InvalidSchema, InvalidProxyURL,
from .auth import _basic_auth_str

    from urllib3.contrib.socks import SOCKSProxyManager
except ImportError:
    def SOCKSProxyManager(*args, **kwargs):
        raise InvalidSchema("Missing dependencies for SOCKS support.")





#此段代码中的注释非常详尽,说明了各个部分的用途和工作原理。例如,从urllib3库导入的各种异常类被用来处理在网络请求过程中可能出现的各种问题,如连接错误、超时、SSL错误等。同样,requests库也定义了一些自己的异常类,如ConnectionError, ConnectTimeout, ReadTimeout, SSLError, ProxyError, RetryError, InvalidSchema, InvalidProxyURL, 和 InvalidURL等。


class BaseAdapter(object):
    """The Base Transport Adapter"""

    def __init__(self):
        super(BaseAdapter, self).__init__()

    def send(self, request, stream=False, timeout=None, verify=True,
             cert=None, proxies=None):
        """Sends PreparedRequest object. Returns Response object.

        :param request: The :class:`PreparedRequest ` being sent.
        :param stream: (optional) Whether to stream the request content.
        :param timeout: (optional) How long to wait for the server to send
            data before giving up, as a float, or a :ref:`(connect timeout,
            read timeout) ` tuple.
        :type timeout: float or tuple
        :param verify: (optional) Either a boolean, in which case it controls whether we verify
            the server's TLS certificate, or a string, in which case it must be a path
            to a CA bundle to use
        :param cert: (optional) Any user-provided SSL certificate to be trusted.
        :param proxies: (optional) The proxies dictionary to apply to the request.
        raise NotImplementedError

    def close(self):
        """Cleans up adapter specific items."""
        raise NotImplementedError

#class BaseAdapter(object)::定义了一个名为BaseAdapter的类,它继承自object(在Python 2中,所有类都隐式地继承自object,而在Python 3中,不显式继承object则是新的风格)。
 #   """The Base Transport Adapter""":这是一个文档字符串,简要描述了这个类的作用。
#def __init__(self)::定义了类的初始化方法。
 #       super(BaseAdapter, self).__init__():调用父类的初始化方法。
#def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None)::定义了一个名为send的方法,这个方法用于发送一个请求并返回响应。
#        """Sends PreparedRequest object. Returns Response object.:这是send方法的文档字符串,解释了其功能。
#        :param request: The :class:PreparedRequest being sent.:文档字符串说明第一个参数request是一个待发送的PreparedRequest对象。
#       :param stream: (optional) Whether to stream the request content.:第二个参数stream是一个可选参数,表示是否要流式传输请求内容。
#        :param timeout: (optional) How long to wait for the server to send data before giving up, as a float, or a :ref:(connect timeout, read timeout) tuple.:第三个参数timeout是一个可选参数,表示等待服务器发送数据的超时时间,可以是一个浮点数或者一个包含连接超时和读取超时的元组。
#        :type timeout: float or tuple:文档字符串说明timeout的类型可以是浮点数或者元组。
#        :param verify: (optional) Either a boolean, in which case it controls whether we verify the server's TLS certificate, or a string, in which case it must be a path to a CA bundle to use.:第四个参数verify是一个可选参数,它可以是一个布尔值来控制是否验证服务器的TLS证书,或者是一个字符串,表示CA证书的路径。
#        :param cert: (optional) Any user-provided SSL certificate to be trusted.:第五个参数cert是一个可选参数,表示用户提供的SSL证书。
 #       :param proxies: (optional) The proxies dictionary to apply to the request.:第六个参数proxies是一个可选参数,表示要应用到请求的代理字典。
 #       """Raises: NotImplementedError:文档字符串说明这个方法会抛出一个NotImplementedError异常。
#def close(self)::定义了一个名为close的方法。
#        """Cleans up adapter specific items.""":这是close方法的文档字符串,解释了其功能。
#        Raises: NotImplementedError:文档字符串说明这个方法会抛出一个NotImplementedError异常。

class HTTPAdapter(BaseAdapter):
    """The built-in HTTP Adapter for urllib3.

    Provides a general-case interface for Requests sessions to contact HTTP and
    HTTPS urls by implementing the Transport Adapter interface. This class will
    usually be created by the :class:`Session ` class under the

    :param pool_connections: The number of urllib3 connection pools to cache.
    :param pool_maxsize: The maximum number of connections to save in the pool.
    :param max_retries: The maximum number of retries each connection
        should attempt. Note, this applies only to failed DNS lookups, socket
        connections and connection timeouts, never to requests where data has
        made it to the server. By default, Requests does not retry failed
        connections. If you need granular control over the conditions under
        which we retry a request, import urllib3's ``Retry`` class and pass
        that instead.
    :param pool_block: Whether the connection pool should block for connections.


      >>> import requests
      >>> s = requests.Session()
      >>> a = requests.adapters.HTTPAdapter(max_retries=3)
      >>> s.mount('http://', a)
    __attrs__ = ['max_retries', 'config', '_pool_connections', '_pool_maxsize',

    def __init__(self, pool_connections=DEFAULT_POOLSIZE,
                 pool_maxsize=DEFAULT_POOLSIZE, max_retries=DEFAULT_RETRIES,
        if max_retries == DEFAULT_RETRIES:
            self.max_retries = Retry(0, read=False)
            self.max_retries = Retry.from_int(max_retries)
        self.config = {}
        self.proxy_manager = {}

        super(HTTPAdapter, self).__init__()

        self._pool_connections = pool_connections
        self._pool_maxsize = pool_maxsize
        self._pool_block = pool_block

        self.init_poolmanager(pool_connections, pool_maxsize, block=pool_block)

    def __getstate__(self):
        return {attr: getattr(self, attr, None) for attr in self.__attrs__}

    def __setstate__(self, state):
        # Can't handle by adding 'proxy_manager' to self.__attrs__ because
        # self.poolmanager uses a lambda function, which isn't pickleable.
        self.proxy_manager = {}
        self.config = {}

        for attr, value in state.items():
            setattr(self, attr, value)

        self.init_poolmanager(self._pool_connections, self._pool_maxsize,

    def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs):
        """Initializes a urllib3 PoolManager.

        This method should not be called from user code, and is only
        exposed for use when subclassing the
        :class:`HTTPAdapter `.

        :param connections: The number of urllib3 connection pools to cache.
        :param maxsize: The maximum number of connections to save in the pool.
        :param block: Block when no free connections are available.
        :param pool_kwargs: Extra keyword arguments used to initialize the Pool Manager.
        # save these values for pickling
        self._pool_connections = connections
        self._pool_maxsize = maxsize
        self._pool_block = block

        self.poolmanager = PoolManager(num_pools=connections, maxsize=maxsize,
                                       block=block, strict=True, **pool_kwargs)
#这段代码定义了一个名为HTTPAdapter的类,该类继承自BaseAdapter。这个类是用于处理HTTP和HTTPS请求的传输适配器,基于urllib3库。它提供了一个通用的接口,使Requests会话能够通过实现Transport Adapter接口来联系HTTP和HTTPS URL。



def proxy_manager_for(self, proxy, **proxy_kwargs):
        """Return urllib3 ProxyManager for the given proxy.

        This method should not be called from user code, and is only
        exposed for use when subclassing the
        :class:`HTTPAdapter `.

        :param proxy: The proxy to return a urllib3 ProxyManager for.
        :param proxy_kwargs: Extra keyword arguments used to configure the Proxy Manager.
        :returns: ProxyManager
        :rtype: urllib3.ProxyManager
        if proxy in self.proxy_manager:
            manager = self.proxy_manager[proxy]
        elif proxy.lower().startswith('socks'):
            username, password = get_auth_from_url(proxy)
            manager = self.proxy_manager[proxy] = SOCKSProxyManager(
            proxy_headers = self.proxy_headers(proxy)
            manager = self.proxy_manager[proxy] = proxy_from_url(

        return manager

    def cert_verify(self, conn, url, verify, cert):
        """Verify a SSL certificate. This method should not be called from user
        code, and is only exposed for use when subclassing the
        :class:`HTTPAdapter `.

        :param conn: The urllib3 connection object associated with the cert.
        :param url: The requested URL.
        :param verify: Either a boolean, in which case it controls whether we verify
            the server's TLS certificate, or a string, in which case it must be a path
            to a CA bundle to use
        :param cert: The SSL certificate to verify.
        if url.lower().startswith('https') and verify:

            cert_loc = None

            # Allow self-specified cert location.
            if verify is not True:
                cert_loc = verify

            if not cert_loc:
                cert_loc = extract_zipped_paths(DEFAULT_CA_BUNDLE_PATH)

            if not cert_loc or not os.path.exists(cert_loc):
                raise IOError("Could not find a suitable TLS CA certificate bundle, "
                              "invalid path: {}".format(cert_loc))

            conn.cert_reqs = 'CERT_REQUIRED'

            if not os.path.isdir(cert_loc):
                conn.ca_certs = cert_loc
                conn.ca_cert_dir = cert_loc
            conn.cert_reqs = 'CERT_NONE'
            conn.ca_certs = None
            conn.ca_cert_dir = None

        if cert:
            if not isinstance(cert, basestring):
                conn.cert_file = cert[0]
                conn.key_file = cert[1]
                conn.cert_file = cert
                conn.key_file = None
            if conn.cert_file and not os.path.exists(conn.cert_file):
                raise IOError("Could not find the TLS certificate file, "
                              "invalid path: {}".format(conn.cert_file))
            if conn.key_file and not os.path.exists(conn.key_file):
                raise IOError("Could not find the TLS key file, "
                              "invalid path: {}".format(conn.key_file))

#这段代码定义了两个方法:proxy_manager_for 和 cert_verify,它们都是用于处理网络请求的HTTP适配器的一部分。

#proxy_manager_for 方法:
#这个方法用于根据给定的代理配置返回一个 urllib3.ProxyManager 对象。
#首先,它会检查是否已经为给定的代理配置创建了 ProxyManager 对象,如果已经存在,就直接返回。
#如果代理以 "socks" 开头,它会获取代理URL中的用户名和密码,并使用这些信息创建一个 SOCKSProxyManager 对象。
#如果代理不是以 "socks" 开头,它会根据代理URL和其他参数创建一个普通的 ProxyManager 对象。
#cert_verify 方法:
#这个方法用于验证SSL证书。它不应该被用户代码直接调用,而是仅在子类化 HTTPAdapter 类时使用。

def build_response(self, req, resp):
        """Builds a :class:`Response ` object from a urllib3
        response. This should not be called from user code, and is only exposed
        for use when subclassing the
        :class:`HTTPAdapter `

        :param req: The :class:`PreparedRequest ` used to generate the response.
        :param resp: The urllib3 response object.
        :rtype: requests.Response
        response = Response()

        # Fallback to None if there's no status_code, for whatever reason.
        response.status_code = getattr(resp, 'status', None)

        # Make headers case-insensitive.
        response.headers = CaseInsensitiveDict(getattr(resp, 'headers', {}))

        # Set encoding.
        response.encoding = get_encoding_from_headers(response.headers)
        response.raw = resp
        response.reason = response.raw.reason

        if isinstance(req.url, bytes):
            response.url = req.url.decode('utf-8')
            response.url = req.url

        # Add new cookies from the server.
        extract_cookies_to_jar(response.cookies, req, resp)

        # Give the Response some context.
        response.request = req
        response.connection = self

        return response

#这段代码定义了一个名为 build_response 的方法,该方法用于从urllib3的响应对象中构建一个requests.Response对象。这个方法主要用于内部处理,不应该被用户直接调用,而是在创建自定义的HTTPAdapter子类时使用。下面是对代码中每一部分的详细解释:
#def build_response(self, req, resp):
#response = Response(): 这行代码创建一个新的Response对象。我们没有上下文来了解Response类的具体实现,但从命名和后续的代码中可以推测它代表一个HTTP响应。
#response.status_code = getattr(resp, 'status', None): 这行代码尝试从urllib3的响应对象中获取状态码,并将其赋值给Response对象的status_code属性。如果resp没有状态码属性,它将设置为None。
#response.headers = CaseInsensitiveDict(getattr(resp, 'headers', {})): 这行代码将响应的头部信息从大写敏感转换为不区分大小写。它使用一个名为CaseInsensitiveDict的字典,这个字典可以存储不区分大小写的键。
#response.encoding = get_encoding_from_headers(response.headers): 这行代码从响应头部获取字符编码。它调用了一个名为get_encoding_from_headers的函数,该函数应该根据响应头部返回正确的字符编码。
#response.raw = resp: 将原始的urllib3响应对象赋值给Response对象的raw属性。
#response.reason = response.raw.reason: 从原始的响应对象中获取并设置HTTP状态消息。
#python`if isinstance(req.url, bytes):
#        response.url = req.url.decode('utf-8')
#        response.url = req.url`
#extract_cookies_to_jar(response.cookies, req, resp): 这行代码调用了一个名为extract_cookies_to_jar的函数,该函数从服务器响应中提取新的cookies,并将其添加到响应的cookie jar中。
#response.request = req: 将原始请求对象赋值给响应对象的request属性。
#response.connection = self: 将当前HTTP适配器对象赋值给响应对象的connection属性。这为处理程序提供了一个引用,以便它可以访问用于发起请求的HTTP适配器。
#return response: 最后,方法返回构建好的响应对象。

def get_connection(self, url, proxies=None):
        """Returns a urllib3 connection for the given URL. This should not be
        called from user code, and is only exposed for use when subclassing the
        :class:`HTTPAdapter `.

        :param url: The URL to connect to.
        :param proxies: (optional) A Requests-style dictionary of proxies used on this request.
        :rtype: urllib3.ConnectionPool
        proxy = select_proxy(url, proxies)

        if proxy:
            proxy = prepend_scheme_if_needed(proxy, 'http')
            proxy_url = parse_url(proxy)
            if not
                raise InvalidProxyURL("Please check proxy URL. It is malformed"
                                      " and could be missing the host.")
            proxy_manager = self.proxy_manager_for(proxy)
            conn = proxy_manager.connection_from_url(url)
            # Only scheme should be lower case
            parsed = urlparse(url)
            url = parsed.geturl()
            conn = self.poolmanager.connection_from_url(url)

        return conn

    def close(self):
        """Disposes of any internal state.

        Currently, this closes the PoolManager and any active ProxyManager,
        which closes any pooled connections.
        for proxy in self.proxy_manager.values():
#def get_connection(self, url, proxies=None):
#proxy = select_proxy(url, proxies):
#def close(self):

def request_url(self, request, proxies):
        """Obtain the url to use when making the final request.

        If the message is being sent through a HTTP proxy, the full URL has to
        be used. Otherwise, we should only use the path portion of the URL.

        This should not be called from user code, and is only exposed for use
        when subclassing the
        :class:`HTTPAdapter `.

        :param request: The :class:`PreparedRequest ` being sent.
        :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs.
        :rtype: str
        proxy = select_proxy(request.url, proxies)
        scheme = urlparse(request.url).scheme

        is_proxied_http_request = (proxy and scheme != 'https')
        using_socks_proxy = False
        if proxy:
            proxy_scheme = urlparse(proxy).scheme.lower()
            using_socks_proxy = proxy_scheme.startswith('socks')

        url = request.path_url
        if is_proxied_http_request and not using_socks_proxy:
            url = urldefragauth(request.url)

        return url

    def add_headers(self, request, **kwargs):
        """Add any headers needed by the connection. As of v2.0 this does
        nothing by default, but is left for overriding by users that subclass
        the :class:`HTTPAdapter `.

        This should not be called from user code, and is only exposed for use
        when subclassing the
        :class:`HTTPAdapter `.

        :param request: The :class:`PreparedRequest ` to add headers to.
        :param kwargs: The keyword arguments from the call to send().

    def proxy_headers(self, proxy):
        """Returns a dictionary of the headers to add to any request sent
        through a proxy. This works with urllib3 magic to ensure that they are
        correctly sent to the proxy, rather than in a tunnelled request if
        CONNECT is being used.

        This should not be called from user code, and is only exposed for use
        when subclassing the
        :class:`HTTPAdapter `.

        :param proxy: The url of the proxy being used for this request.
        :rtype: dict
        headers = {}
        username, password = get_auth_from_url(proxy)

        if username:
            headers['Proxy-Authorization'] = _basic_auth_str(username,

        return headers


def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None):
        """Sends PreparedRequest object. Returns Response object.

        :param request: The :class:`PreparedRequest ` being sent.
        :param stream: (optional) Whether to stream the request content.
        :param timeout: (optional) How long to wait for the server to send
            data before giving up, as a float, or a :ref:`(connect timeout,
            read timeout) ` tuple.
        :type timeout: float or tuple or urllib3 Timeout object
        :param verify: (optional) Either a boolean, in which case it controls whether
            we verify the server's TLS certificate, or a string, in which case it
            must be a path to a CA bundle to use
        :param cert: (optional) Any user-provided SSL certificate to be trusted.
        :param proxies: (optional) The proxies dictionary to apply to the request.
        :rtype: requests.Response

            conn = self.get_connection(request.url, proxies)
        except LocationValueError as e:
            raise InvalidURL(e, request=request)

        self.cert_verify(conn, request.url, verify, cert)
        url = self.request_url(request, proxies)
        self.add_headers(request, stream=stream, timeout=timeout, verify=verify, cert=cert, proxies=proxies)

        chunked = not (request.body is None or 'Content-Length' in request.headers)

        if isinstance(timeout, tuple):
                connect, read = timeout
                timeout = TimeoutSauce(connect=connect, read=read)
            except ValueError as e:
                # this may raise a string formatting error.
                err = ("Invalid timeout {}. Pass a (connect, read) "
                       "timeout tuple, or a single float to set "
                       "both timeouts to the same value".format(timeout))
                raise ValueError(err)
        elif isinstance(timeout, TimeoutSauce):
            timeout = TimeoutSauce(connect=timeout, read=timeout)

            if not chunked:
                resp = conn.urlopen(

            # Send the request.
                if hasattr(conn, 'proxy_pool'):
                    conn = conn.proxy_pool

                low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT)


                    for header, value in request.headers.items():
                        low_conn.putheader(header, value)


                    for i in request.body:

                    # Receive the response from the server
                        # For Python 2.7, use buffering of HTTP responses
                        r = low_conn.getresponse(buffering=True)
                    except TypeError:
                        # For compatibility with Python 3.3+
                        r = low_conn.getresponse()

                    resp = HTTPResponse.from_httplib(
                    # If we hit any problems here, clean up the connection.
                    # Then, reraise so that we can handle the actual exception.

        except (ProtocolError, socket.error) as err:
            raise ConnectionError(err, request=request)

        except MaxRetryError as e:
            if isinstance(e.reason, ConnectTimeoutError):
                # TODO: Remove this in 3.0.0: see #2811
                if not isinstance(e.reason, NewConnectionError):
                    raise ConnectTimeout(e, request=request)

            if isinstance(e.reason, ResponseError):
                raise RetryError(e, request=request)

            if isinstance(e.reason, _ProxyError):
                raise ProxyError(e, request=request)

            if isinstance(e.reason, _SSLError):
                # This branch is for urllib3 v1.22 and later.
                raise SSLError(e, request=request)

            raise ConnectionError(e, request=request)

        except ClosedPoolError as e:
            raise ConnectionError(e, request=request)

        except _ProxyError as e:
            raise ProxyError(e)

        except (_SSLError, _HTTPError) as e:
            if isinstance(e, _SSLError):
                # This branch is for urllib3 versions earlier than v1.22
                raise SSLError(e, request=request)
            elif isinstance(e, ReadTimeoutError):
                raise ReadTimeout(e, request=request)

        return self.build_response(request, resp)
#使用try catch捕获各类错误信息
