import requests
import sys
import chardet #检测字符编码,但是有时候会有误差
#查看两个网址的编码情况
response1 = requests.get("https://github.com/favicon.ico")
print(chardet.detect(response1.content)) #参数为字节型
response2 = requests.get("http://www.baidu.com/")
print(chardet.detect(response2.content))
--------------结果----------------
{'encoding': None, 'confidence': 0.0, 'language': None}
{'encoding': 'utf-8', 'confidence': 0.99, 'language': ''}
#造成这种情况的可能是由于网页压缩,从而乱码,编码解码也无效
--------------------编码-----------------------
data = response1.content
print(sys.getdefaultencoding()) #查看默认编码
print(type(data))
#两种字节转换为字符串的方法
print(str(data,encoding='utf-8'))
print(data.decode('utf-8'))
data2= response2.content
print(type(data2))
#两种字节转换为字符串的方法
print(str(data2,encoding='utf-8'))
print(data2.decode('utf-8'))
-----------------结果-------------
data报错:
Traceback (most recent call last):
File "E:/PythonStudy/练习题/Request_ex.py", line 52, in
print(str(data,encoding='utf-8'))
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc5 in position 101: invalid continuation byte
data2正确返回: