压力测试过程中,采集服务器性能数据

通过python脚本与linux命令结合的方式采集服务器性能数据。根据测试过程中服务器当前的tcp链接数量来决定数据采集是否结束。
脚本主要有三个操作,第一个是性能数据初步采集,通过调用linux的sar和iostat命令,将数据写入原始文件中。采集完成后,执行性能指标提取脚本,从原始指标文件提取有效的数据写入最终的文件中,并进行打包操作。
代码只是本人满足工作所需而作,算不上很好,可以满足工作所需,仅此而已

从原始文件提取数据的配置文件,根据服务器语言类型区分:
abstractConf_ch.xml—中文
abstractConf_en.xml—英文
配置文件主要是指明原始文件路径并按照需求使用linux的cat、egrep、awk命令从文件中提取数据


<abstract>
    <res_file name="res/CPU">
        <uniqflag>CPUuniqflag>
        <object_file>result/cpu_statusobject_file>
        <graphtitle>Cpu_Statusgraphtitle>
        <linelabel>%user %systemlinelabel>
        <x_y_label>Time(s) Cpu_Percent(%)x_y_label>
        <cmd>cat %s | egrep -v "Linux|^$|%s" | awk 'BEGIN {print "%s\n%s\n%s"}{if($2 !~/AM|PM/) print $3,$5}' >> %scmd>
    res_file>
    ...............
    ...............
abstract>

获取服务连接数量

# coding:utf-8
#__author__ = 'Libiao'

import subprocess

class GetLinkingNumber(object):
    def __init__(self):
        pass

    def getLinkingNumber(serlf,servers):
        ret = []
        if isinstance(servers,str):
            num = subprocess.Popen("netstat -tnap | grep tcp | grep %s | wc -l" %servers,stdout=subprocess.PIPE,shell=True).stdout
            ret.append(int(num.readline().strip()))
        elif isinstance(servers,dict):
            for k,v in servers.items():
                num = subprocess.Popen("netstat -tnap | grep tcp | grep %s | wc -l" %v,stdout=subprocess.PIPE,shell=True).stdout
                ret.append(int(num.readline().strip()))
        else:
            pass
        return ret

需要由主程序执行的linux命令

#!/bin/bash

sar -n DEV 10 >>res/NetWork &
iostat -x -d -k 10 >>res/Disk &
sar -r 10 >>res/Memory &
sar -q 10 >>res/System_load_average &
sar -u 10 >>res/CPU &
sar -b 10 >>res/TPS &

数据采集代码主方法

#-*- coding:utf-8 -*-

"""
reated on 2015年10月16日

@author: LiBiao
"""

import time,os
import subprocess
import multiprocessing
from write_log import writeLog
import del_old_file
from record_test_data import Record_Data
from server_memory_collect import serverMemoryCollect
from get_linking_number import GetLinkingNumber

#需要手动设置的参数
SERVERS_D = {'1935':'srs-rtmp','18080':'srs-hls','80':'nginx'} #可以输入srs或者nginx或者ATS

#间隔时间
INTERVAL_TIME = 10


class KPI_Collect(object):
    def __init__(self):
        self.getLinkNum = GetLinkingNumber()
        self.TCP_COUNT = self.getLinkNum.getLinkingNumber(SERVERS_D)
        self.tcpRecord = Record_Data("res/linking_number")

    def getStr(self,alist):
        ret = ""
        for s  in alist:
            ret += str(s)
            ret += ' '
        return [ret.rstrip(' ')]

    #通过调用collect.sh脚本来执行服务器性能数据采集
    def sys_kpi_collect(self):
        flag = '1'
        cmds = ['./collect.sh']
        popen = subprocess.Popen(cmds[0],stdout=subprocess.PIPE,shell=True)
        pid = popen.pid
        writeLog('INFO','>>>>> 性能指标采集进程执行中.....')
        self.to_stop_subprocess(flag,popen)

    #停止sys_kpi_collect执行的程序的popen句柄
    def to_stop_subprocess(self,flag,popen):
        curr_tcpnum = self.getLinkNum.getLinkingNumber(SERVERS_D)
        self.tcpRecord.recordData(["srs&nginx Linking","%s %s %s" %tuple(SERVERS_D.values()),"Time(s) Numbers"])
        self.tcpRecord.recordData(self.getStr(self.TCP_COUNT))
        if flag is '1':
            loops = 0
            while True:
                if sum(curr_tcpnum) <= sum(self.TCP_COUNT):
                    if loops == 15:
                        #15s内当前连接数小于初始化连接数,退出程序
                        #删除还存在于系统中的sar和iostat进程
                        names = ['sar','iostat']
                        cmd = "killall -9 %s %s" %tuple(names)
                        subprocess.call(cmd,shell=True)
                        #终止子进程
                        popen.kill()
                        if subprocess.Popen.poll(popen) is not None:
                            break
                        else:
                            writeLog("INFO",r">>>>> 等待子进程终止")
                    else:
                        loops += 5
                        time.sleep(5)
                else:
                    loops = 0
                    time.sleep(INTERVAL_TIME)#等待INTERVAL_TIME时间
                curr_tcpnum = self.getLinkNum.getLinkingNumber(SERVERS_D)
                self.tcpRecord.recordData(self.getStr(curr_tcpnum))
            writeLog("INFO",r">>>>> 性能指标采集完成")
        else:
            while True:
                if subprocess.Popen.poll(popen) is not None:
                    break
                else:
                    writeLog("INFO",r">>>>> 等待子进程终止")
            writeLog("INFO",r">>>>> 性能指标采集完成")


    #判断系统中是否还存留sar和iostat进程
    def is_process_exists(self,name):
        cmd = "ps ax | grep %s | grep -v grep" %name
        p = subprocess.Popen(cmd,stdout=subprocess.PIPE,shell=True)
        p.wait()
        if p.stdout.readline():
            return 1
        return 0


    def main_start(self):
        start_times = 0.0
        timeRecord = Record_Data("res/timeConsum")
        for server,num in zip(SERVERS_D.values(),self.TCP_COUNT):
            writeLog("INFO",r">>>>> 初始 %s 服务连接数 %d" %(server,num))
        curr_tcpN = self.getLinkNum.getLinkingNumber(SERVERS_D)
        time.sleep(10)
        while True:
            if not sum(curr_tcpN) <= sum(self.TCP_COUNT):
                start_times = time.time()
                for server,num in zip(SERVERS_D.values(),curr_tcpN):
                    writeLog("INFO",r">>>>> 指标采集任务开始,当前 %s 连接数 %d" %(server,num))

                #删除旧的kpi文件
                del_old_file.Del_Old_File("res/").del_old_file()
                #单独线程执行其他服务(srs、nginx等)进程内存指标采集任务
                     for port,server in SERVERS_D.items():
                    multiprocessing.Process(target=serverMemoryCollect,args=([port,server],INTERVAL_TIME,sum(self.TCP_COUNT),self.getLinkNum)).start()

                #采集服务器系统kpi指标
                self.sys_kpi_collect()

                writeLog("INFO",r">>>>> 性能数据采集结束!")
                time_consum = time.time() - start_times
                timeRecord.recordData(["%s" %str(time_consum)])
                break
            else:
                time.sleep(1)
            curr_tcpN = self.getLinkNum.getLinkingNumber(SERVERS_D)

if __name__ == '__main__':
    kpiCollect = KPI_Collect()
    kpiCollect.main_start()

采集其他服务进程消耗内存的代码

#-*- coding:utf-8 -*-

"""
reated on 2015年10月16日

@author: LiBiao
"""

import time
import subprocess
from write_log import writeLog
from record_test_data import Record_Data

#Record the memory of server used
def serverMemoryCollect(servers,intervaltime,tcpNum,getLinkObj):
    getLinkNum = getLinkObj
    memRecord = Record_Data("res/%s" %(servers[1]+":"+servers[0]))
    cmd = "ps -ef | grep %s | grep -v grep | awk \'{print $2}\'" %servers[1]
    f = subprocess.Popen(cmd,stdout=subprocess.PIPE,shell=True)
    writeLog("INFO",">>>>> %s 指标采集进程执行中....." %servers[1])
    pids = [pid.strip() for pid in f.stdout]

    heard = [servers[1],'used','Linking_Number Memory_Capacity(MB)']
    try:
        memRecord.recordData(heard)
        curr_tcpN = sum(getLinkNum.getLinkingNumber(servers[0]))
        loops = 0
        while True:
            vrss = []
            for p in pids:
                cmd2 = "cat /proc/%s/status | grep VmRSS | awk \'{print $2}\'" %p
                rss = subprocess.Popen(cmd2,stdout=subprocess.PIPE,shell=True).stdout
                vrss.append(int(rss.readline().strip()))
            memRecord.recordData(['%s' %str((sum(vrss)/1024))])
            if curr_tcpN <= tcpNum:
                if loops == 15:
                    #15s之内,当前连接数小于初始化连接数,程序退出
                    break
                else:
                    loops += 5
                    time.sleep(5)
            else:
                loops = 0
                time.sleep(intervaltime)
            curr_tcpN = sum(getLinkNum.getLinkingNumber(servers[0]))

        writeLog("INFO",r">>>>> %s 进程内存采集完成" %servers[1])
    except IOError as err:
        writeLog("INFO","File error: " + str(err))
        return 0

从原始数据文件提取有效数据并写入新的文件

# -*- coding: utf-8 -*-   
'''
Created on 2015年9月14日

@author: LiBiao
'''

import os,time
import subprocess
import getCmds
import del_old_file
from write_log import writeLog


#需要手动配置的数据
#SERVER_NAME = ['srs_2.0.0.','nginx']#'nginx'    #可以输入nginx或者srs
SERVERS_D = {'1935':'srs-rtmp','18080':'srs-hls','80':'nginx'}

#系统语言编码
LANG = "en_US.UTF-8"

#获取系统当前使用的语言
def getSysLANG():
    popen = subprocess.Popen('echo $LANG',stdout=subprocess.PIPE,shell=True)
    return popen.stdout.read().strip()

# 根据系统语言编码获取对应配置文件路径
def getConfPath():
    if getSysLANG() == LANG:
        return "./conf/abstractConf_en.xml"
    return "./conf/abstractConf_ch.xml"

class AbstractKPI(object):
    def __init__(self,*args):
        (self.cmds,) = args

    def abstract_kpi(self):
        for cmd in self.cmds:
            # print cmd
            subprocess.Popen(cmd,stdout=subprocess.PIPE,shell=True)


#获取本机ip地址,用来产生区别于其他机器的数据
def get_local_ip():
    try:
        ip = os.popen("ifconfig | grep 'inet addr' | awk '{print $2}'").read()
        ip = ip[ip.find(':') + 1:ip.find('\n')]
    except Exception,e:
        print e
    return ip

#将最终采集数据打包
def to_tar():
    ip = get_local_ip()
    times = time.strftime("%Y-%m-%d-%H-%M-%S",time.localtime())
    subprocess.call("cp res/linking_number res/timeConsum " +"res/%s "*len(SERVERS_D.items()) %tuple([v + "\:" + k for k,v in SERVERS_D.items()]) + "result/",shell=True)
    files = ["result/" + filename for filename in os.listdir("result/")]
    cmd = 'tar -cf SYS_KPI_'+ ip + "_" + times + '.tar' + ' %s'*len(files) %tuple(files)
    try:
        subprocess.call(cmd,shell=True)
    except Exception as err:
        writeLog("ERROR",r">>>>> 文件压缩出现错误 %s" %str(err))
        exit()

    writeLog("INFO",r">>>>> 指标文件打包完成")


#脚本主入口函数
def main_start():
    #删除旧的kpi文件
    del_old_file.Del_Old_File("result/").del_old_file()

    #获取到配置文件路径
    confpath = getConfPath()

    #调用getCmds获取解析kpi文件的命令
    cmds = getCmds.Get_Cmds(confpath).getcmds()

    #从原始指标文件提取有用的数据
    AbstractKPI(cmds).abstract_kpi()

    #将result目录下的解析后的kpi文件打包
    to_tar()
    writeLog("INFO",r">>>>> 指标数据提取并打包完成")

if __name__ == '__main__':
    main_start()

脚本中采集数据的命令是linux的,其实这并不是最合适的处理方式,之前只是为了满足工作所需。目前正在使用python第三方模块psutil中的一些方法来执行服务器性能数据的采集,这样的话,脚本就会更加符合python开发的模式。

你可能感兴趣的:(性能测试,python,压力测试,性能)