TOC
pyppeteer用到拦截请求的时候,按照度娘上面的方法很多都没成功,各种错误,其中遇到的:RuntimeWarning: Enable tracemalloc to get the object allocation traceback错误
意思是 调用异步函数,没有关键字await
这时灵光一闪,加上关键字await试试??不用异步调用试试??
经过多次尝试,终于拦截成功!!
结果是不用异步,直接调用成功!!
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:ITbaby_boy
@time:2021/06/30
pyppteer拦截请求问题
"""
import asyncio
from pyppeteer import launch, launcher
async def main():
# 浏览器 启动参数
start_parm = {
# 启动chrome的路径
# "executablePath": r"C:\Users\Administrator\AppData\Local\pyppeteer\pyppeteer\local-chromium\588429\chrome-win32\chrome.exe",
# 关闭无头浏览器
"headless": False,
# "headless": True,
'defaultViewport': {'width': 400, 'height': 700},
"args": [
'--disable-infobars', # 关闭自动化提示框
'--log-level=30', # 日志保存等级, 建议设置越好越好,要不然生成的日志占用的空间会很大 30为warning级别
'--user-agent=Mozilla/5.0 (Linux; Android 10; LIO-AN00 Build/HUAWEILIO-AN00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/76.0.3809.89 Mobile Safari/537.36 T7/11.21 SP-engine/2.17.0 baiduboxapp/11.21.0.10 (Baidu; P1 10) NABar/1.0',
# UA
'--no-sandbox', # 关闭沙盒模式
# '--window-size=1366,768', # 窗口大小
'--start-maximized', # 窗口最大化模式
# '--proxy-server=http://localhost:8001', # 代理
r'userDataDir=D:\UserInfo' # 用户文件地址
],
}
browser = await launch(start_parm)
page = await browser.newPage()
await page.setViewport({'width': 400, 'height': 700})
await page.setUserAgent('UA-TEST')
# 反爬js
js_text = """
() =>{
Object.defineProperties(navigator,{ webdriver:{ get: () => false } });
window.navigator.chrome = { runtime: {}, };
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5,6], });
}
"""
await page.evaluateOnNewDocument(js_text) # 本页刷新后值不变,自动执行js
# 启用拦截
page.on('response', get_content)
await page.goto('https://www.baidu.com/')
input('---')
def get_content(response):
if 'www.baidu.com' in response.url:
print(response.url)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.gather(main()))