jmeter的测试结果JTL转化

jmeter的测试结果一般保存为jtl格式文件,采用命令行形式时,需要把jtl导入到jmeter中,才能获取到完成的测试结果(非测试报告),还需要在导出到csv中。主要是当遇到大文件时,jmeter导入往往会存在异常,让统计工作很难进行。查找jmeter 应该有类似的插件。当做学习,自己写了一个简单的脚本进行处理。将jtl文件按照测试场景、不符合标准数据、全部测试结果等维度进行汇总,最终写入EXCEL。

解析的文件如下,放在文件夹中进行轮询。

XXX—_单场景基准测试.jtl
XXX_单场景负载测试并发10.jtl
XXX_单场景负载测试并发20.jtl
XXX_混合场景负载测试并发10.jtl
XXX_稳定性测试.jtl
..............
数据的统计逻辑,均参考了jmeter的源码的计算逻辑,基于精度的考虑,可能存在差异,但是可以忽略。

响应时间取90%响应时间,通过标准判断有两个条件,响应时间>1000ms 错误率>0.00%。目前该脚本无法处理带有事务的测试结果,后续会完善。验证大文件1000W行1.8G的的文件处理时间需要40秒。有些无效代码,留作验证使用,未删除。

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2020-05-15 16:52
# @Author  : 无名氏
# @File    : jtl_toexcel.py
import os
import pandas as pd
import datetime
import time
from decimal import Decimal
import math
import gc
from  JtlFileFun.JtlFileFun import *


class JtlFileToExcel():
    def __init__(self,reponse_time,filefolder):
        self.filefolder = filefolder
        if reponse_time:
            self.reponse_time = int(reponse_time)
        else:
            self.reponse_time = False


        self.column_name = ('timeStamp', 'elapsed', 'label', 'responseCode', 'responseMessage',
                            'threadName', 'dataType', 'success', 'failureMessage','bytes',
                            'sentBytes', 'grpThreads', 'allThreads', 'URL', 'Latency', 'IdleTime',
                            'Connect','testSuiteName')  #JTL列名转化为PD列名


    def Walk_folder(self,file_folder):
        file_dict = {}
        for dirpath, dirnames, filenames in os.walk(file_folder):
            for filename in filenames:
                file_item ={}
                file_path = os.path.join(dirpath, filename)
                (file_name, extension) = os.path.splitext(filename)
                # file_dict[file_name] = file_path
                # file_item['file_name'] = file_name
                file_item['file_size'] = str(int(os.path.getsize(file_path)/1024))+'KB'
                file_item['file_path'] = file_path
                file_dict[file_name] = file_item

                # print(file_name,str(int(os.path.getsize(file_path)/1024))+'KB')
        return file_dict

    def time_diff(self,end_time,start_time):
        '''
        返回时间差
        :param end_time:
        :param start_time:
        :return:
        '''
        strTime = (end_time - start_time).seconds * 1000 + (end_time - start_time).microseconds / 1000
        # print(strTime)
        if strTime>60000:
            return (str(round(strTime / 60000,2)) + 'min')
        elif strTime>1000:
            return (str(round(strTime/1000,3))+'s')
        else:
            return (str(round(strTime,7))+'ms')


    def chunk_file(self,file,file_name):
        '''
        针对大文件采用分块读取的方法,提高读取效率
        :param sample:
        :return:
        '''
        jtl_file = pd.read_csv(file, iterator=True, low_memory=False, usecols=[0, 1, 2, 7])
        loop = True
        chunkSize = 10000000  #行
        chunks = []
        while loop:
            try:
                chunk = jtl_file.get_chunk(chunkSize)
                chunks.append(chunk)
            except StopIteration:
                loop = False
                # print("Iteration is stopped")
        jtl_pd = pd.concat(chunks, ignore_index=True)
        jtl_pd['testSuiteName'] = file_name
        return jtl_pd



    def read_jtlfile(self):
        print('开始处理文件')
        file_dict = self.Walk_folder(self.filefolder)  # 获取所有文件的路径及名称
        all_data = pd.DataFrame(columns=self.column_name)
        all_result_file = pd.ExcelWriter('Allreport.xlsx')
        fail_result_file = pd.ExcelWriter('Failed.xlsx')
        all_failed_df = pd.DataFrame()
        all_ninety_df = pd.DataFrame()
        scene_count_df = pd.DataFrame(columns=('scene','scene_count','scene_ninety(ms)'))
        i = 0 
		
        for (file_name, file) in file_dict.items():
			starttime = datetime.datetime.now()
            # print(file_name)
           
            data_pd = self.chunk_file(file['file_path'],file_name)  #大文件读取
            scene_df = self.pd_analyze(file_name,data_pd) #统计全部数据
            #将每个场景汇总数据写入EXCEL
            scene_df.to_excel(all_result_file,sheet_name=file_name,index=False)#index=False  规避写入时多余一列0
			
            #未通过测试标准数据
            scene_failed_df = self.get_fail_df(scene_df)#查询不满足标准数据
            all_failed_df = all_failed_df.append(scene_failed_df) #累加不符合标准数据
            
			#汇总90%数据
            ninety_df = scene_df[['Sampler', 'Ninety']]
            ninety_df.columns = ['Sampler', file_name]  # 修改Ninety列名为每个场景名称
            #针对添加列的情况,对第一个场景数据,直接赋值,然后后续的和该pd进行meerge
			if i == 0:
                all_ninety_df = ninety_df
            else:
                all_ninety_df = pd.merge(all_ninety_df, ninety_df, how = 'left',on='Sampler')
            i = i+1
            
			#汇总场景数据
            # print(file_name,scene_df['sample_count'].sum(),scene_df['Ninety'].max())
            scene_count_df = scene_count_df.append(pd.DataFrame({'scene':[file_name],'scene_count':[scene_df['sample_count'].sum()],'scene_ninety(ms)':[scene_df['Ninety'].max()]}))

            endtime = datetime.datetime.now()
            print(file_name,'  文件大小:'+file['file_size']+'  、处理耗时: '+self.time_diff(endtime,starttime))
            # print(file_name, len(new_data_pd),' 长度  时间处理时长:', self.time_diff(filetime2, filetime1))


       #写不符合标准数据
        if not all_failed_df.empty:
            all_failed_df.to_excel(fail_result_file, sheet_name='不符合标准数据', index=False)  # index=False  规避写入时多余一列0
            fail_result_file.save()
            fail_result_file.close()
       
	   #将汇总的所有场景的90%的响应时间,写入EXCEL
        all_ninety_df.to_excel(all_result_file, sheet_name='90%响应时间汇总', index=False)  # index=False  规避写入时多余一列0
        
		# 将汇总的所有场景的请求数和最大90%的响应时间,写入EXCEL
        scene_count_df.to_excel(all_result_file, sheet_name='测试场景汇总', index=False)  # index=False  规避写入时多余一列0

        #保存文件
		all_result_file.save()
        all_result_file.close()

        print('文件全部解析完成')



    def get_fail_df(self,scene_df):
        '''
        查询失败的请求,判断条件90%响应时间(Ninety)>self.reponse_time,失败率>0.00%
        self.reponse_time 可配置
        '''
    # 查询>1000毫秒、失败率>0.00%的数据   1000可配置
        if self.reponse_time:
            scene_df_failed = scene_df.loc[(scene_df.Ninety > self.reponse_time) |(scene_df.ERROR !='0.00%') ]
        else:
            scene_df_failed = scene_df.loc[scene_df.ERROR != '0.00%']  # 如果没有响应时间,则只筛选错误率>0.00的数据
        return scene_df_failed



    def pd_analyze(self,file_name,data_pd):
        scene_df = pd.DataFrame(columns=('Sampler','sample_count','MIN','MAX','average','Ninety','Ninety_five','throught(s)','ERROR','Scence'))  # 创建DF,定义列名
        for (sample,sample_df) in data_pd.groupby('label'): #groupby后为key.value形式
            sample = sample.replace('-','')#去除特殊负号

            sample_sort_df = sample_df.sort_values(by="elapsed", axis=0, ascending=True, inplace=False)  # 按照elapsed降序排序
            sample_sort_df = sample_sort_df.reset_index(drop=True) #重新生成索引,删除原来的索引.
            sample_count = sample_df.shape[0]  # 获取行数
            # 获取90%,95%,50%响应时间的序号
            level_90 = math.ceil(sample_count * 0.9)
            level_95 = math.ceil(sample_count * 0.95)
            level_50 = math.ceil(sample_count * 0.5)
			# 获取90%响应时间
            reponse_time_90 = sample_sort_df.at[level_90 - 1, 'elapsed']  
            reponse_time_95 = sample_sort_df.at[level_95 - 1, 'elapsed']  
            reponse_time_50 = sample_sort_df.at[level_50 - 1, 'elapsed']  
            max = sample_sort_df['elapsed'].max()  # 获取最大值
            min = sample_sort_df['elapsed'].min()  #获取最小值
            max_time = sample_sort_df['timeStamp'].max()  #获取最大时间
            min_time = sample_sort_df['timeStamp'].min()  # 获取最大时间
            #计算吞吐量数据
            throught = round((sample_count / (int(max_time) - int(min_time)))*1000,2)
            # print(sample,max_time,min_time)
            res_proportion = sample_sort_df['success'].value_counts(normalize=True)
            for index in res_proportion.index:
                if index:
                    err_ratio = '0.00'
                    continue
                else:
                    err_ratio = round((res_proportion[index]) * 100, 2)
                    break
            scene_df = scene_df.append(pd.DataFrame({'Sampler':[sample],'sample_count':[sample_count],'MIN':[min],'MAX':[max],'average':[reponse_time_50],'Ninety':[reponse_time_90],'Ninety_five':[reponse_time_95],'throught(s)':[throught],'ERROR':[str(err_ratio)+'%'],'Scence':[file_name]}))
        return scene_df

 

你可能感兴趣的:(数据,Python,测试技术)