目标URL:
aHR0cHM6Ly9iYWlqaWFoYW8uYmFpZHUuY29tL3M/aWQ9MTc2NDk0ODY1NzA2MTc5MjA0NCZ3ZnI9c3BpZGVyJmZvcj1wYw==
1、正常使用 requests 请求数据总是会出现 “网络不给力,请稍后重试”,添加代理也不起作用
2、网页中涉及ja3指纹认证
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.ssl_ import create_urllib3_context
CIPHERS = (
'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:RSA+3DES:!aNULL:'
'!eNULL:!MD5'
)
headers = {
'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"
}
class DESAdapter(HTTPAdapter):
def init_poolmanager(self, *args, **kwargs):
context = create_urllib3_context(ciphers=CIPHERS)
kwargs['ssl_context'] = context
return super(DESAdapter, self).init_poolmanager(*args, **kwargs)
def proxy_manager_for(self, *args, **kwargs):
context = create_urllib3_context(ciphers=CIPHERS)
kwargs['ssl_context'] = context
return super(DESAdapter, self).proxy_manager_for(*args, **kwargs)
s = requests.Session()
s.mount('目标域名', DESAdapter())
res = s.get('目标URL', headers=headers)
res.encoding = 'utf-8'
print(res.text)
3、使用 pyhttpx 模块
import pyhttpx
sess = pyhttpx.HttpSession()
res = sess.get(url=url, headers=headers)
print(res.text)
4、使用 curl_cffi 模块
仓库在这里
pip install curl_cffi
from curl_cffi import requests
# 注意这个 impersonate 参数,指定了模拟哪个浏览器
r = requests.get("https://tls.browserleaks.com/json", impersonate="chrome101")
print(r.json())
# output: {'ja3_hash': '53ff64ddf993ca882b70e1c82af5da49'
使用代理
proxies={"http": "http://localhost:7777", "https": "http://localhost:7777"}
r = requests.get("http://baidu.com",
proxies=proxies,
allow_redirects=False,
impersonate="chrome101"
)