UI自动化过程中,经常存在需要获取http请求信息的场景。
例如:元素文案为接口返回需要,需要验证文案正确性;出现报错时方便排查问题,打印http请求日志等
selenium-wire官网 该库仅支持python3.6+
# 以获取API商城 - IP查询服务的timestamp签名为例
# 是seleniumwire 不是 selenium
import time
from seleniumwire import webdriver
driver = webdriver.Chrome()
driver.get('https://apis.baidu.com/store/aladdin/land?cardType=ipSearch')
driver.find_element_by_xpath('//*[@id="app"]/div[2]/div/div/div[2]/input').send_keys("112.10.36.59")
driver.find_element_by_xpath('//*[@id="app"]/div[2]/div/div/div[2]/div').click()
time.sleep(1)
# Access requests via the `requests` attribute
for request in driver.requests:
if request.response and "timestamp" in request.headers:
print(request.headers["timestamp"])
driver.close()
浏览器设置代理,从代理服务器获取请求,该方式不做详细描述
设置代理
推荐代理工具使用Mitmproxy
启动浏览器时添加如下信息
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
option = webdriver.ChromeOptions()
option.add_experimental_option('perfLoggingPrefs', {'enableNetwork': True})
caps = DesiredCapabilities.CHROME
caps['goog:loggingPrefs'] = {'performance': 'ALL'}
driver = webdriver.Chrome(chrome_driver_path, options=option, desired_capabilities=caps)
"""
日志的类型
['Network.loadingFailed', 'Network.loadingFinished', 'Network.resourceChangedPriority',
'Network.requestServedFromCache', 'Network.requestWillBeSent', 'Network.requestWillBeSentExtraInfo',
'Network.responseReceived', 'Network.responseReceivedExtraInfo', 'Network.dataReceived',
'Page.frameAttached', 'Page.frameRequestedNavigation', 'Page.frameStoppedLoading',
'Page.frameClearedScheduledNavigation', 'Page.loadEventFired', 'Page.frameStartedLoading',
'Page.frameDetached', 'Page.frameScheduledNavigation', 'Page.frameNavigated', 'Page.frameResized',
'Page.domContentEventFired']
请求的类型(待补充)
[XHR(接口请求), Fetch(接口请求), 'Script'(.js), 'Stylesheet(.css)', 'Image'(.png等), 'Font', 'Document(文件), 'Manifest', 'Ping', 'Preflight', 'Navigation', 'other']
"""
time.sleep(2) # 等待一下大部分请求处理完成
request_list = [] # 所有的请求
response_list = [] # 所有的返回
cache_list = [] # 所有的缓存读取记录
for responseReceived in self.driver.get_log('performance'):
message = json.loads(responseReceived['message'])['message']
# 获取所有请求信息(请求信息集中在requestWillBeSent)
if message['method'] == 'Network.requestWillBeSent':
request_id = message['params']['requestId']
request = message['params']['request']
try: # 尝试获取请求body,type为浏览器开发者模式network下类型筛选(用于区分接口请求和页面请求)
request_list.append({'id': request_id, 'type': message['params']['type'],
'url': request['url'], 'method': request['method'],
'req_time': responseReceived['timestamp'], 'req_headers': request['headers'],
'req_body': json.loads(request['postData'])})
except:
request_list.append({'id': request_id, 'type': message['params']['type'],
'url': request['url'], 'method': request['method'],
'req_time': responseReceived['timestamp'], 'req_headers': request['headers']})
# 获取所有返回信息(返回信息集中在responseReceived,但是其中无body信息)
elif message['method'] == 'Network.responseReceived':
request_id = message['params']['requestId']
response = message['params']['response']
try: # responseReceived日志中无response body信息,需要额外进行获取
resp_body = json.loads(self.driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': request_id})['body'])
except:
resp_body = None
try: # 能获取到requestHeaders尽量使用,因为此处比较全
response_list.append({'id': request_id, 'type': message['params']['type'], 'url': response['url'],
'resp_time': responseReceived['timestamp'],
'req_headers': response['requestHeaders'], 'resp_status': response['status'],
'resp_headers': response['headers'], 'resp_body': resp_body})
except:
response_list.append({'id': request_id, 'type': message['params']['type'], 'url': response['url'],
'resp_time': responseReceived['timestamp'], 'resp_status': response['status'],
'resp_headers': response['headers'], 'resp_body': resp_body})
# 获取是否为缓存请求(从浏览器缓存直接获取,一般为css、js文件请求)
elif message['method'] == 'Network.requestServedFromCache':
request_id = message['params']['requestId']
cache_list.append({'id': request_id})
# 合并request与cache(cache必定少于等于request)
new_request_list = []
for request in request_list:
num = 0
for cache in cache_list:
num += 1
if request['id'] == cache['id']:
new_request_list.append(dict(request, **{'req_from_cache': True}))
break
if num == len(cache_list) and request['id'] != cache['id']:
new_request_list.append(dict(request, **{'req_from_cache': False}))
# 合并request与response(response必定少于等于request,原因是拉取日志的时候会有一些还没收到response的请求)
complete_request_list = [] # 完整有response的请求列表
incomplete_request_list = [] # 不完整没有response的请求列表
for request in new_request_list:
num = 0
for response in response_list:
num += 1
if request['url'] == response['url'] and request['id'] == response['id'] and request['type'] == response['type']:
complete_request_list.append(dict(request, **response)) # response在后,因为response中的req_headers比较全
break
if num == len(response_list) and request['id'] != response['id']:
incomplete_request_list.append(request)