jmeter的测试结果一般保存为jtl格式文件,采用命令行形式时,需要把jtl导入到jmeter中,才能获取到完成的测试结果(非测试报告),还需要在导出到csv中。主要是当遇到大文件时,jmeter导入往往会存在异常,让统计工作很难进行。查找jmeter 应该有类似的插件。当做学习,自己写了一个简单的脚本进行处理。将jtl文件按照测试场景、不符合标准数据、全部测试结果等维度进行汇总,最终写入EXCEL。
解析的文件如下,放在文件夹中进行轮询。
XXX—_单场景基准测试.jtl
XXX_单场景负载测试并发10.jtl
XXX_单场景负载测试并发20.jtl
XXX_混合场景负载测试并发10.jtl
XXX_稳定性测试.jtl
..............
数据的统计逻辑,均参考了jmeter的源码的计算逻辑,基于精度的考虑,可能存在差异,但是可以忽略。
响应时间取90%响应时间,通过标准判断有两个条件,响应时间>1000ms 错误率>0.00%。目前该脚本无法处理带有事务的测试结果,后续会完善。验证大文件1000W行1.8G的的文件处理时间需要40秒。有些无效代码,留作验证使用,未删除。
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2020-05-15 16:52
# @Author : 无名氏
# @File : jtl_toexcel.py
import os
import pandas as pd
import datetime
import time
from decimal import Decimal
import math
import gc
from JtlFileFun.JtlFileFun import *
class JtlFileToExcel():
def __init__(self,reponse_time,filefolder):
self.filefolder = filefolder
if reponse_time:
self.reponse_time = int(reponse_time)
else:
self.reponse_time = False
self.column_name = ('timeStamp', 'elapsed', 'label', 'responseCode', 'responseMessage',
'threadName', 'dataType', 'success', 'failureMessage','bytes',
'sentBytes', 'grpThreads', 'allThreads', 'URL', 'Latency', 'IdleTime',
'Connect','testSuiteName') #JTL列名转化为PD列名
def Walk_folder(self,file_folder):
file_dict = {}
for dirpath, dirnames, filenames in os.walk(file_folder):
for filename in filenames:
file_item ={}
file_path = os.path.join(dirpath, filename)
(file_name, extension) = os.path.splitext(filename)
# file_dict[file_name] = file_path
# file_item['file_name'] = file_name
file_item['file_size'] = str(int(os.path.getsize(file_path)/1024))+'KB'
file_item['file_path'] = file_path
file_dict[file_name] = file_item
# print(file_name,str(int(os.path.getsize(file_path)/1024))+'KB')
return file_dict
def time_diff(self,end_time,start_time):
'''
返回时间差
:param end_time:
:param start_time:
:return:
'''
strTime = (end_time - start_time).seconds * 1000 + (end_time - start_time).microseconds / 1000
# print(strTime)
if strTime>60000:
return (str(round(strTime / 60000,2)) + 'min')
elif strTime>1000:
return (str(round(strTime/1000,3))+'s')
else:
return (str(round(strTime,7))+'ms')
def chunk_file(self,file,file_name):
'''
针对大文件采用分块读取的方法,提高读取效率
:param sample:
:return:
'''
jtl_file = pd.read_csv(file, iterator=True, low_memory=False, usecols=[0, 1, 2, 7])
loop = True
chunkSize = 10000000 #行
chunks = []
while loop:
try:
chunk = jtl_file.get_chunk(chunkSize)
chunks.append(chunk)
except StopIteration:
loop = False
# print("Iteration is stopped")
jtl_pd = pd.concat(chunks, ignore_index=True)
jtl_pd['testSuiteName'] = file_name
return jtl_pd
def read_jtlfile(self):
print('开始处理文件')
file_dict = self.Walk_folder(self.filefolder) # 获取所有文件的路径及名称
all_data = pd.DataFrame(columns=self.column_name)
all_result_file = pd.ExcelWriter('Allreport.xlsx')
fail_result_file = pd.ExcelWriter('Failed.xlsx')
all_failed_df = pd.DataFrame()
all_ninety_df = pd.DataFrame()
scene_count_df = pd.DataFrame(columns=('scene','scene_count','scene_ninety(ms)'))
i = 0
for (file_name, file) in file_dict.items():
starttime = datetime.datetime.now()
# print(file_name)
data_pd = self.chunk_file(file['file_path'],file_name) #大文件读取
scene_df = self.pd_analyze(file_name,data_pd) #统计全部数据
#将每个场景汇总数据写入EXCEL
scene_df.to_excel(all_result_file,sheet_name=file_name,index=False)#index=False 规避写入时多余一列0
#未通过测试标准数据
scene_failed_df = self.get_fail_df(scene_df)#查询不满足标准数据
all_failed_df = all_failed_df.append(scene_failed_df) #累加不符合标准数据
#汇总90%数据
ninety_df = scene_df[['Sampler', 'Ninety']]
ninety_df.columns = ['Sampler', file_name] # 修改Ninety列名为每个场景名称
#针对添加列的情况,对第一个场景数据,直接赋值,然后后续的和该pd进行meerge
if i == 0:
all_ninety_df = ninety_df
else:
all_ninety_df = pd.merge(all_ninety_df, ninety_df, how = 'left',on='Sampler')
i = i+1
#汇总场景数据
# print(file_name,scene_df['sample_count'].sum(),scene_df['Ninety'].max())
scene_count_df = scene_count_df.append(pd.DataFrame({'scene':[file_name],'scene_count':[scene_df['sample_count'].sum()],'scene_ninety(ms)':[scene_df['Ninety'].max()]}))
endtime = datetime.datetime.now()
print(file_name,' 文件大小:'+file['file_size']+' 、处理耗时: '+self.time_diff(endtime,starttime))
# print(file_name, len(new_data_pd),' 长度 时间处理时长:', self.time_diff(filetime2, filetime1))
#写不符合标准数据
if not all_failed_df.empty:
all_failed_df.to_excel(fail_result_file, sheet_name='不符合标准数据', index=False) # index=False 规避写入时多余一列0
fail_result_file.save()
fail_result_file.close()
#将汇总的所有场景的90%的响应时间,写入EXCEL
all_ninety_df.to_excel(all_result_file, sheet_name='90%响应时间汇总', index=False) # index=False 规避写入时多余一列0
# 将汇总的所有场景的请求数和最大90%的响应时间,写入EXCEL
scene_count_df.to_excel(all_result_file, sheet_name='测试场景汇总', index=False) # index=False 规避写入时多余一列0
#保存文件
all_result_file.save()
all_result_file.close()
print('文件全部解析完成')
def get_fail_df(self,scene_df):
'''
查询失败的请求,判断条件90%响应时间(Ninety)>self.reponse_time,失败率>0.00%
self.reponse_time 可配置
'''
# 查询>1000毫秒、失败率>0.00%的数据 1000可配置
if self.reponse_time:
scene_df_failed = scene_df.loc[(scene_df.Ninety > self.reponse_time) |(scene_df.ERROR !='0.00%') ]
else:
scene_df_failed = scene_df.loc[scene_df.ERROR != '0.00%'] # 如果没有响应时间,则只筛选错误率>0.00的数据
return scene_df_failed
def pd_analyze(self,file_name,data_pd):
scene_df = pd.DataFrame(columns=('Sampler','sample_count','MIN','MAX','average','Ninety','Ninety_five','throught(s)','ERROR','Scence')) # 创建DF,定义列名
for (sample,sample_df) in data_pd.groupby('label'): #groupby后为key.value形式
sample = sample.replace('-','')#去除特殊负号
sample_sort_df = sample_df.sort_values(by="elapsed", axis=0, ascending=True, inplace=False) # 按照elapsed降序排序
sample_sort_df = sample_sort_df.reset_index(drop=True) #重新生成索引,删除原来的索引.
sample_count = sample_df.shape[0] # 获取行数
# 获取90%,95%,50%响应时间的序号
level_90 = math.ceil(sample_count * 0.9)
level_95 = math.ceil(sample_count * 0.95)
level_50 = math.ceil(sample_count * 0.5)
# 获取90%响应时间
reponse_time_90 = sample_sort_df.at[level_90 - 1, 'elapsed']
reponse_time_95 = sample_sort_df.at[level_95 - 1, 'elapsed']
reponse_time_50 = sample_sort_df.at[level_50 - 1, 'elapsed']
max = sample_sort_df['elapsed'].max() # 获取最大值
min = sample_sort_df['elapsed'].min() #获取最小值
max_time = sample_sort_df['timeStamp'].max() #获取最大时间
min_time = sample_sort_df['timeStamp'].min() # 获取最大时间
#计算吞吐量数据
throught = round((sample_count / (int(max_time) - int(min_time)))*1000,2)
# print(sample,max_time,min_time)
res_proportion = sample_sort_df['success'].value_counts(normalize=True)
for index in res_proportion.index:
if index:
err_ratio = '0.00'
continue
else:
err_ratio = round((res_proportion[index]) * 100, 2)
break
scene_df = scene_df.append(pd.DataFrame({'Sampler':[sample],'sample_count':[sample_count],'MIN':[min],'MAX':[max],'average':[reponse_time_50],'Ninety':[reponse_time_90],'Ninety_five':[reponse_time_95],'throught(s)':[throught],'ERROR':[str(err_ratio)+'%'],'Scence':[file_name]}))
return scene_df