针对BS架构的产品,日常工作中通常会有这种需求,计算某个业务动作耗时,一般最普通的方式是掐表计时,但是这种方式很难统计到小于1s的数据,第二种就是通过测试工具统计接口的时间,但是这样就忽略了静态资源加载和渲染的时间。
下文脚本通过分析Chrome的timing时间,分析导出Chrome的请求文件的方式,计算业务的时间。计算逻辑和方式应该还有改进空间,后续需要继续优化。
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2020-03-20 17:04
# @Author : 无名氏
# @File : ResolveHarFile.py
#Har file is Chrome DevTools Export file
import os
import json
import time
from datetime import datetime
class ResolveHarFile():
def __init__(self,harfilepath):
self.har_filepath = harfilepath
def get_all_harfile(self):
'''
循环获取抓取的Har文件
:return:
'''
for dirpath, dirnames, filenames in os.walk(self.har_filepath):
for filename in filenames:
file_path = os.path.join(dirpath, filename)
(file_name, extension) = os.path.splitext(filename)
# print(file_path)
# print(file_name)
self.read_harfile(file_path,file_name)
def msec_calculate(self,msec):
'''
针对毫秒大于1000的情况,重新计算返回秒和毫秒,否则则直接返回0和毫秒
:return:
'''
msec_int = 0 #秒 赋一个初始值
if msec>1000: #如果毫秒大于1000则取整取余,重新得到秒和毫秒数据
msec_int = int(msec)//1000 #除数取整
msec = int(msec)%1000 #毫秒重新赋值
if msec < 100: #如果毫秒小于100左侧补0
msec = str(msec).zfill(3)
return msec_int, msec
def timestamp_calculate(self,request_time,last_cost_time):
'''
计算请求时间转为时间戳,然后求差值作为页面请求时间,
页面请求时间为第一次请求的开始时间和最后一次请求的开始时间的差值
加上最后一次请求的耗费时间
传入时间字符串,返回时间戳
'''
request_time = request_time.replace('T', ' ').replace('Z', '')# 去除日期字符串中的特殊字符
time_array = datetime.strptime(request_time, "%Y-%m-%d %H:%M:%S.%f") #将时间转化为元组
millisecond_stamp = int(time_array.microsecond / 1000) #获取日期中的毫秒数据
timestamp = int(time.mktime(time_array.timetuple()) * 1000) #将日期转为时间戳
if last_cost_time: #判断有没有传递业务时间
millisecond_stamp = millisecond_stamp+int(last_cost_time) #将业务耗时和请求时间计算
else:
millisecond_stamp = millisecond_stamp
timestamp = timestamp+millisecond_stamp #将日期时间戳和毫秒结合
return timestamp
def page_request_timer(self):
pass
def read_harfile(self,filepath,file_name):
file = open(filepath, 'rb')
request_info = json.loads(file.read()) # 转为json格式
data = request_info['log']
page_data = data['pages'][0]
(contentload_time, load_time) = self.har_page_info(page_data)
entries_data = data['entries']
first_request_time = entries_data[0]['startedDateTime'] # 获取第一个请求的开始时间
last_request_time = entries_data[len(entries_data)-1]['startedDateTime'] #获取最后一个请求的开始时间
last_cost_time = round(entries_data[len(entries_data)-1]['time'],2) # 获取最后一个请求的耗时
# print(first_request_time,last_request_time,last_cost_time)
time_diff = self.timestamp_calculate(last_request_time,last_cost_time)-self.timestamp_calculate(first_request_time,'') #计算时间差
# print(time_diff)
print('Page info:',file_name,' \nRequest Total=',len(entries_data),', Contentload_time(ms)=', contentload_time, ' ,Load_time(ms)=', load_time,' ,Page Time(s)=',round(time_diff/1000,2))
for requests in entries_data:
(method,url_type,url,time,sent_timing,ttfb_timing,download_timing,stall_timing,queue_timing,postdata) = self.request_info(requests)
# print(method,type,url,time,sent_timing,ttfb_timing,download_timing,stall_timing,queue_timing,postdata)
# print('request:',method,url_type,time)
file.close()
# print('文件已关闭')
def har_page_info(self,page_data):
contentload_time = page_data['pageTimings']['onContentLoad']
load_time = page_data['pageTimings']['onLoad']
return round(contentload_time,2) ,round(load_time,2)
def timing_info(self,timing):
'''
解析chrome的timing数据,得到瀑布流中的时间节点的数据
'''
request_sent = timing["send"]
request_ttfb = timing["wait"]
request_download = timing["receive"]
blocked_queueing = timing["_blocked_queueing"]
blocked = timing["blocked"]
blocked_proxy = timing["blocked"]
stall = blocked-blocked_proxy
return (round(request_sent,2),round(request_ttfb,2),round(request_download,2),round(stall,2),round(blocked_queueing,2))
def request_info(self,requests):
# print(requests)
time = requests["time"]
url = requests["request"]["url"]
method = requests["request"]["method"]
if "postData" in requests["request"].keys(): #针对get中无postdata做兼容
postdata = requests["request"]["postData"]
else:
postdata = "No postdata"
type = requests["_resourceType"]
timings = requests["timings"]
(sent,ttfb,download,stall,queue) = self.timing_info(timings) # 解析chrome的timings时间
return (method,type,url,round(time,2),sent,ttfb,download,stall,queue,postdata)
def write_excel(self):
pass
filepath = r'D:\WORK\2020-03(BsToJMX)\har'
if __name__ == "__main__":
run = ResolveHarFile(filepath)
run.get_all_harfile()
最终统计结果:
Page info: 641
Request Total= 113 , Contentload_time(ms)= 4201.38 ,Load_time(ms)= 18257.85 ,Page Time(s)= 20.75
Page info: baidu
Request Total= 28 , Contentload_time(ms)= 705.82 ,Load_time(ms)= 5702.63 ,Page Time(s)= 5.79
Page info: baidu11
Request Total= 28 , Contentload_time(ms)= 448.18 ,Load_time(ms)= 732.77 ,Page Time(s)= 0.85
Page info: 点击登录
Request Total= 4 , Contentload_time(ms)= 4201.38 ,Load_time(ms)= 18257.85 ,Page Time(s)= 0.8