【Selenium】控制当前已经打开的 chrome浏览器,获取Network数据

【Selenium】控制当前已经打开的 chrome浏览器窗口

# -*- coding: utf-8 -*-
# @Time    : 2022/12/6 9:37
# @Author  : Cocktail_py

# # start chrome --remote-debugging-port=9527 --user-data-dir="D:\selenium"
import os
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

if __name__ == '__main__':
    os.system(r'start chrome --remote-debugging-port=9527 --user-data-dir="D:\selenium"')
    options = Options()
    options.add_experimental_option("debuggerAddress", "127.0.0.1:9527")
    driver = webdriver.Chrome(options=options)
    driver.get('https://bot.sannysoft.com/')
    driver.close()
    driver.service.stop()

【Selenium】Selenium获取Network数据

# -*- coding: utf-8 -*-
# @Time    : 2022/12/6 9:37
# @Author  : Cocktail_py


import os
import json
from selenium import webdriver
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.chrome.options import Options

caps = {
    "browserName": "chrome",
    'goog:loggingPrefs': {'performance': 'ALL'}  # 开启日志性能监听
}
os.system(r'start chrome --remote-debugging-port=9527 --user-data-dir="D:\selenium"')
options = Options()
options.add_experimental_option("debuggerAddress", "127.0.0.1:9527")  # 指定端口为9527
browser = webdriver.Chrome(desired_capabilities=caps, options=options)  # 启动浏览器
browser.get('https://blog.csdn.net/Cocktail_py')  # 访问该url


# 过滤请求包,一般来说,像图片、css&js文件等,往往是不需要的,所以可以对它们过滤~(这一步可以根据自己的需求来过滤)
def filter_type(_type: str):
    types = [
        'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
        'image/jpeg', 'image/x-icon', 'application/octet-stream'
    ]
    if _type not in types:
        return True
    return False


performance_log = browser.get_log('performance')  # 获取名称为 performance 的日志
for packet in performance_log:
    message = json.loads(packet.get('message')).get('message')  # 获取message的数据
    if message.get('method') != 'Network.responseReceived':  # 如果method 不是 responseReceived 类型就不往下执行
        continue
    packet_type = message.get('params').get('response').get('mimeType')  # 获取该请求返回的type
    if not filter_type(_type=packet_type):  # 过滤type
        continue
    requestId = message.get('params').get('requestId')  # 唯一的请求标识符。相当于该请求的身份证
    url = message.get('params').get('response').get('url')  # 获取 该请求  url
    try:
        resp = browser.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId})  # selenium调用 cdp
        print(f'type: {packet_type} url: {url}')
        print(f'response: {resp}')
        print()
    except WebDriverException:  # 忽略异常
        pass


参考:
https://frica.blog.csdn.net/article/details/126551260
https://frica.blog.csdn.net/article/details/126389273

你可能感兴趣的:(selenium,chrome,selenium,python)