>>> req = requests.get('')>>> req>>> print req.text[:100]FILE: /usr/lib/python2.7/dist-packages/requests/models.pyc, LINE: 770 <==> ISO-8859-1FILE: /usr/lib/python2.7/dist-packages/requests/models.pyc, LINE: 781 <==> ISO-8859-1戮漏露芦(JD.COM)-脳脹潞脧脥酶鹿潞脢脳脩隆-脮媒脝路碌脥录脹隆垄脝路脰脢
# 杩欓噷鍑虹幇浜嗕贡鐮�
>>> dir(req)
['__attrs__', '__bool__', '__class__', '__delattr__', '__dict__', '__doc__', '__format__', '__getattribute__', '__getstate__', '__hash__', '__init__', '__iter__', '__module__', '__new__', '__nonzero__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_content', '_content_consumed', 'apparent_encoding', 'close', 'connection', 'content', 'cookies', 'elapsed', 'encoding', 'headers', 'history', 'is_redirect', 'iter_content', 'iter_lines', 'json', 'links', 'ok', 'raise_for_status', 'raw', 'reason', 'request', 'status_code', 'text', 'url']
>>> print req.content[:100]戮漏露芦(JD.COM)-貨潞袓雼椦�-纸品碌图邸垄品讑
>>> print req.content.decode('gbk')[:100]浜笢(JD.COM)-缁煎悎缃戣喘棣栭��-姝e搧浣庝环銆佸搧璐ㄤ繚闅溿�侀厤閫佸強鏃躲�佽交鏉捐喘鐗╋紒
## 鐢变簬璇ラ〉闈㈡椂gbk缂栫爜鐨勶紝鑰孡inux鏄痷tf-8缂栫爜锛屾墍浠ユ墦鍗拌偗瀹氭槸涔辩爜锛屾垜浠厛杩涜瑙g爜銆傚氨鑳芥纭樉绀轰簡銆�
>>> print req.text[:100]FILE: /usr/lib/python2.7/dist-packages/requests/models.pyc, LINE: 770 <==> ISO-8859-1FILE: /usr/lib/python2.7/dist-packages/requests/models.pyc, LINE: 781 <==> ISO-8859-1戮漏露芦(JD.COM)-脳脹潞脧脥酶鹿潞脢脳脩隆-脮媒脝路碌脥录脹隆垄脝路脰脢
>>> print req.text.decode('gbk')[:100]
FILE: /usr/lib/python2.7/dist-packages/requests/models.pyc,聽 LINE: 770 <==> ISO-8859-1
FILE: /usr/lib/python2.7/dist-packages/requests/models.pyc,聽 LINE: 781 <==> ISO-8859-1
Traceback (most recent call last):
聽 File "", line 1, in
UnicodeEncodeError: 'ascii' codec can't encode characters in position 60-63: ordinal not in range(128)
#聽 瀵箃ext灞炴�ц繘琛岃В鐮侊紝灏变細鍑虹幇閿欒銆�
# /requests/
def content(self):
聽 聽 """Content of the response, in bytes."""
聽 聽 if self._content is False:
聽 聽 聽 聽 # Read the contents.
聽 聽 聽 聽 try:
聽 聽 聽 聽 聽 聽 if self._content_consumed:
聽 聽 聽 聽 聽 聽 聽 聽 raise RuntimeError(
聽 聽 聽 聽 聽 聽 聽 聽 聽 聽 'The content for this response was already consumed')
聽 聽 聽 聽 聽 聽 if self.status_code == 0:
聽 聽 聽 聽 聽 聽 聽 聽 self._content = None
聽 聽 聽 聽 聽 聽 else:
聽 聽 聽 聽 聽 聽 聽 聽 self._content = bytes().join(self.iter_content(CONTENT_CHUNK_SIZE)) or bytes()
聽 聽 聽 聽 except AttributeError:
聽 聽 聽 聽 聽 聽 self._content = None
聽 聽 self._content_consumed = True
聽 聽 # don't need to release the connection; that's been handled by urllib3
聽 聽 # since we exhausted the data.
聽 聽 return self._content
# requests/
def text(self):
聽 聽 """Content of the response, in unicode.
聽 聽 If Response.encoding is None, encoding will be guessed using
聽 聽 ``chardet``.
聽 聽 The encoding of the response content is determined based solely on HTTP
聽 聽 headers, following RFC 2616 to the letter. If you can take advantage of
聽 聽 non-HTTP knowledge to make a better guess at the encoding, you should
聽 聽 set ``r.encoding`` appropriately before accessing this property.
聽 聽 """
聽 聽 # Try charset from content-type
聽 聽 content = None
聽 聽 encoding = self.encoding
聽 聽 if not self.content:
聽 聽 聽 聽 return str('')
聽 聽 # Fallback to auto-detected encoding.
聽 聽 if self.encoding is None:
聽 聽 聽 聽 encoding = self.apparent_encoding
聽 聽 # Decode unicode from given encoding.
聽 聽 try:
聽 聽 聽 聽 content = str(self.content, encoding, errors='replace')
聽 聽 except (LookupError, TypeError):
聽 聽 聽 聽 # A LookupError is raised if the encoding was not found which could
聽 聽 聽 聽 # indicate a misspelling or similar mistake.
聽 聽 聽 聽 #
聽 聽 聽 聽 # A TypeError can be raised if encoding is None
聽 聽 聽 聽 #
聽 聽 聽 聽 # So we try blindly encoding.
聽 聽 聽 聽 content = str(self.content, errors='replace')
聽 聽 return content
鐪嬬湅娉ㄥ拰婧愮爜鐭ラ亾锛宑ontent鏄痷rllib3璇诲彇鍥炴潵鐨勫師濮嬪瓧鑺傜爜锛岃�宼ext涓嶈繃鏄皾璇曞content閫氳繃缂栫爜鏂瑰紡瑙g爜涓簎nicode銆俲 椤甸潰涓篻bk缂栫爜锛岄棶棰樺氨鍑哄湪杩欓噷銆�
>>> req.apparent_encoding;req.encoding'GB2312'
# rquests/
def apparent_encoding(self):
聽 聽 """The apparent encoding, provided by the chardet library"""
# requests/adapters.pydef build_response(self, req, resp): """Builds a :class:`Response` object from a urllib3 response. This should not be called from user code, and is only exposed for use when subclassing the :class:`HTTPAdapter` :param req: The :class:`PreparedRequest` used to generate the response.
聽 聽 :param resp: The urllib3 response object.
聽 聽 """
聽 聽 response = Response()
聽 聽 # Fallback to None if there's no status_code, for whatever reason.
聽 聽 response.status_code = getattr(resp, 'status', None)
聽 聽 # Make headers case-insensitive.
聽 聽 response.headers = CaseInsensitiveDict(getattr(resp, 'headers', {}))
聽 聽 # Set encoding.
聽 聽 response.encoding = get_encoding_from_headers(response.headers)
聽 聽 # .......
# requests/
def get_encoding_from_headers(headers):
聽 聽 """Returns encodings from given HTTP Header Dict.
聽 聽 :param headers: dictionary to extract encoding from.
聽 聽 """
聽 聽 content_type = headers.get('content-type')
聽 聽 if not content_type:
聽 聽 聽 聽 return None
聽 聽 content_type, params = cgi.parse_header(content_type)
聽 聽 if 'charset' in params:
聽 聽 聽 聽 return params['charset'].strip("'\"")
聽 聽 if 'text' in content_type:
聽 聽 聽 聽 return 'ISO-8859-1'
鍙戠幇浜嗗悧锛熺▼搴忓彧閫氳繃http鍝嶅簲棣栭儴鑾峰彇缂栫爜锛屽亣濡傚搷搴斾腑锛屾病鏈夋寚瀹歝harset, 閭d箞鐩存帴杩斿洖'ISO-8859-1'銆�
鍙互鐪嬪埌锛宺eqponse header鍙寚瀹氫簡type锛屼絾鏄病鏈夋寚瀹氱紪鐮�(涓�鑸幇鍦ㄩ〉闈㈢紪鐮侀兘鐩存帴鍦╤tml椤甸潰涓�)銆傛墍鏈夎鍑芥暟灏辩洿鎺ヨ繑鍥�'ISO-8859-1'銆�
1. 淇敼get_encoding_from_headers鍑芥暟锛岄�氳繃姝e垯鍖归厤锛屾潵妫�娴嬮〉闈㈢紪鐮併�傜敱浜庣幇鍦ㄧ殑椤甸潰閮藉湪HTML浠g爜涓寚瀹氫簡charset锛屾墍浠ラ�氳繃姝e垯寮忓尮閰嶇殑缂栫爜鏂瑰紡鏄畬鍏ㄦ纭殑銆�
2. 鐢变簬content鏄疕TTP鐩稿簲鐨勫師濮嬪瓧鑺備覆锛屾墍浠ユ垜浠渶瑕佺洿鎺ュ彲浠ラ�氳繃浣跨敤瀹冦�傛妸content鎸夌収椤甸潰缂栫爜鏂瑰紡瑙g爜涓簎nicode锛�