python调用ELK接口,判定内存泄漏

from elasticsearch import Elasticsearch
import re
import sys
sys.setrecursionlimit(100000)#设置递归深度为十万
from elasticsearch import helpers
import matplotlib.pyplot as plt
import datetime,time


class extract_memory_leaks(object):

    def __init__(self,ip,user_name,password,process_name,interval_time):
        """
        :param ip:
        :param user_name:
        :param password:
        :param process_name:
        :param interval_time:
        """
        self.ip = ip
        self.user_name = user_name
        self.password = password
        self.process_name = process_name
        self.interval_time = interval_time

    def draw_picture(self,process_slope_data_dict,index1,index2):
        s = 0.0
        for index3 in range(0,
                            len(process_slope_data_dict[self.process_name + "_slope_y_list_caculate"][
                                index1:index2]) - 2):
            a = process_slope_data_dict[self.process_name + "_slope_x_list_caculate"][index1:index2][
                    index3 + 1] * \
                process_slope_data_dict[self.process_name + "_slope_y_list_caculate"][index1:index2][
                    index3] - process_slope_data_dict[self.process_name + "_slope_x_list_caculate"][index1:index2][
                    index3] * \
                process_slope_data_dict[self.process_name + "_slope_y_list_caculate"][index1:index2][
                    index3 + 1]
            # a =process_slope_data_dict[process_name + "_slope_x_list_caculate"][index1:end_index][index3 + 1 ] * process_slope_data_dict[process_name + "_slope_y_list_caculate"][index1:end_index][index3] - process_slope_data_dict[process_name + "_slope_x_list_caculate"][index1:end_index][index3] * process_slope_data_dict[process_name + "_slope_y_list_caculate"][index1:end_index][index3 + 1]
            s += a
        b = process_slope_data_dict[self.process_name + "_slope_x_list_caculate"][index1:index2][-1] * \
            process_slope_data_dict[self.process_name + "_slope_y_list_caculate"][index1:index2][-1] - \
            process_slope_data_dict[self.process_name + "_slope_x_list_caculate"][index1:index2][0] * \
            process_slope_data_dict[self.process_name + "_slope_y_list_caculate"][index1:index2][0]
        if (s + b) / 2 > 0:
            plt.plot(process_slope_data_dict[self.process_name + "_slope_x_list"][index1:index2],
                     process_slope_data_dict[self.process_name + "_slope_y_list"][index1:index2], '-o')
            plt.show()

    def filter_memory_leak(self):
        """
        #设置过滤条件
        :return:
        """
        es = Elasticsearch(hosts=self.ip, http_auth=(self.user_name, self.password))
        body = {
            "_source" : {"includes" : [ "@timestamp","dmname","bytes"]},
            "query":{
            "bool":{
                "must":[
                    {
                        "term":{
                            "Device_Identity":"10.245.57.110"
                        }
                    }
                ]
            }
        },
                    "sort":{
                        "@timestamp":{
                        "order":"asc"
                        }
                    }
        }

        query = es.search(index='devices_mem_check-20*',body=body,scroll='5m',size=100)
        results = query['hits']['hits']  # es查询出的结果第一页
        total = query['hits']['total']  # es查询出的结果总量
        scroll_id = query['_scroll_id']  # 游标用于输出es查询出的所有结果
        # 得到所有的数据
        for i in range(0, int(total / 100) + 1):
            query_scroll = es.scroll(scroll_id=scroll_id, scroll='5m')['hits']['hits']
            results += query_scroll
        #将所有数据分类保存在contain_data_dict中,其中包含了每一个数据的进程名称、dm_mem值以及对应的时间
        contain_data_dict = {}
        process_name_list = []
        for hits in results:
            # print(hits)
            if hits["_source"]["dmname"] in contain_data_dict:  # 判断dmname是否已经在contain_name_list的键中
                # 判断contain_data_dict[hits["_source"]["dmname"] + '_timestamp']中是否已经包含该个时间戳
                if hits["_source"]["@timestamp"] not in contain_data_dict[hits["_source"]["dmname"] + '_timestamp']:
                    contain_data_dict[hits["_source"]["dmname"]].append(hits["_source"]["bytes"])
                    contain_data_dict[hits["_source"]["dmname"] + '_timestamp'].append(hits["_source"]["@timestamp"])
            else:
                contain_data_dict[hits["_source"]["dmname"]] = []
                contain_data_dict[hits["_source"]["dmname"] + '_timestamp'] = []
                process_name_list.append(hits["_source"]["dmname"])
                contain_data_dict[hits["_source"]["dmname"]].append(hits["_source"]["bytes"])
                contain_data_dict[hits["_source"]["dmname"] + '_timestamp'].append(hits["_source"]["@timestamp"])

        # 将信息进行处理之后,放入process_slope_data_dict中
        process_slope_data_dict = {}
        for process_name in process_name_list:
            process_slope_data_dict[process_name + "_slope_y_list"] = []
            process_slope_data_dict[process_name + "_slope_x_list"] = []
            process_slope_data_dict[process_name + "_slope_y_list_caculate"] = []
            # process_slope_data_dict[process_name + "_slope_x_list_caculate"] = []
            for index in range(1, len(contain_data_dict[process_name])):
                slope_y = contain_data_dict[process_name][index] - contain_data_dict[process_name][index - 1]

                # 将时间进行格式化处理,方便后续进行时间的相加减
                # time = time + datetime.timedelta(days=1)当前时间加一天
                # 每次循环的时间点的第一个时间点
                middle_time1 = re.sub("T", ' ', contain_data_dict[process_name + "_timestamp"][index - 1][:19])
                time1 = datetime.datetime.strptime(middle_time1, "%Y-%m-%d %H:%M:%S")
                # 每次循环的时间点
                middle_time2 = re.sub("T", ' ', contain_data_dict[process_name + "_timestamp"][index][:19])
                time2 = datetime.datetime.strptime(middle_time2, "%Y-%m-%d %H:%M:%S")
                # 尽管在上面已经去重,但是是时分秒最后部分不一样,此时再次去重
                if time1 == time2 and index < len(contain_data_dict[process_name + "_timestamp"]):
                    continue
                # 求出每段时间真正的斜率:时间间隔以秒隔开,用以计算面积图
                # time_interval = time.mktime(time2.timetuple()) - time.mktime(time1.timetuple())
                # process_slope_data_dict[process_name + "_slope_y_list_caculate"].append(slope_y / time_interval)
                # process_slope_data_dict[process_name + "_slope_x_list_caculate"].append(time.mktime(time2.timetuple()))
                # 以循环的那个时间点作为斜率图的横坐标
                process_slope_data_dict[process_name + "_slope_x_list"].append(time2)
                process_slope_data_dict[process_name + "_slope_y_list_caculate"].append(contain_data_dict[process_name][index])
                if slope_y < 0:
                    process_slope_data_dict[process_name + "_slope_y_list"].append(-1)
                elif slope_y == 0:
                    process_slope_data_dict[process_name + "_slope_y_list"].append(0)
                else:
                    process_slope_data_dict[process_name + "_slope_y_list"].append(1)


        start_point = 0  # 设置做边界
        for index1 in range(0, len(process_slope_data_dict[self.process_name + "_slope_x_list"])):
            if index1 == start_point:  # 令循环从边界开始
                end_time = process_slope_data_dict[self.process_name + "_slope_x_list"][index1] + datetime.timedelta(
                    days=self.interval_time)
                if end_time <= process_slope_data_dict[self.process_name + "_slope_x_list"][-1]:
                    count1 = 0  # 添加计数器,斜率图中,每有一个正数,count+1
                    index_middle_list = []  # 找出不超过时间间隔的最大的那个索引
                    for index2, index2_data in enumerate(process_slope_data_dict[self.process_name + "_slope_x_list"]):
                        if index2_data <= end_time:
                            index_middle_list.append(index2)
                    end_index = index_middle_list[-1]
                    for y_row in process_slope_data_dict[self.process_name + "_slope_y_list"][index1:end_index + 1]:
                        if y_row == 1:
                            count1 += 1
                    start_point += 1
                    if count1 / (end_index + 1 - index1) >= 0.7:  # 如果在一段时间内,内存有90的概率都在上升,那么可以判定为异常部分
                        while count1 / (end_index + 1 - index1) >= 0.7:
                            if end_index + 1 < len(
                                    process_slope_data_dict[self.process_name + "_slope_x_list"]):  # 指针右移,判断总体是否还大于90%
                                end_index += 1
                                if process_slope_data_dict[self.process_name + "_slope_y_list"][end_index] == 1:
                                    count1 += 1
                            else:
                                break
                        # 此时已经找出了斜率上升率达到90%的部分,那么只需计算出最后一个数值是否大于第一个数值,如果大于说明内存在那段时间内处于上升部分,判定为异常

                        # self.draw_picture(process_slope_data_dict,index1,end_index)
                        if process_slope_data_dict[self.process_name + "_slope_y_list_caculate"][end_index] > process_slope_data_dict[self.process_name + "_slope_y_list_caculate"][index1]:
                            plt.plot(process_slope_data_dict[self.process_name + "_slope_x_list"][index1:end_index],
                                     process_slope_data_dict[self.process_name + "_slope_y_list"][index1:end_index], '-o')
                            plt.show()
                        start_point = end_index
                else:  # 如果指针指向了最后一个数,那么条件终止,直接画出最后一段
                    count2 = 0
                    for y_row in process_slope_data_dict[self.process_name + "_slope_y_list"][index1:]:
                        if y_row == 1:
                            count2 += 1
                    if count2 / (len(process_slope_data_dict[self.process_name + "_slope_y_list"]) + 1 - index1) >= 0.7:
                        # self.draw_picture(process_slope_data_dict,index1,-1)
                        if process_slope_data_dict[self.process_name + "_slope_y_list_caculate"][-1] > process_slope_data_dict[self.process_name + "_slope_y_list_caculate"][index1]:

                            plt.plot(process_slope_data_dict[self.process_name + "_slope_x_list"][index1:],
                                     process_slope_data_dict[self.process_name + "_slope_y_list"][index1:], '-o')
                            plt.show()

obtain_initial_data = extract_memory_leaks()
obtain_initial_data.filter_memory_leak()

#plmgrd 3
#ipmgrd 1
#ifmd




# print(process_slope_data_dict["plmgrd_slope_x_list"])
#ipmgrd  ifmd   plmgrd   4天   1,2,3三幅图
#ifmd  2天   4,5,6三幅图



# #actions列表中是所有要返回到es中的数据
# # actions = []
# # for process_name in process_name_list:
# #     # print(i)
# #     for index,data in enumerate(contain_data_dict[process_name]):
# #         action = {
# #                     "_index": "zf_process_test",
# #                     "_type": "doc",
# #                     "_source": {
# #                         "dmname": process_name,
# #                         "bytes":data,
# #                         "@timestamp": contain_data_dict[process_name + "_timestamp"][index]
# #                     }
# #                 }
# #         actions.append(action)
# #
# # helpers.bulk(es,actions,index='zf_process_test', raise_on_error=True)

你可能感兴趣的:(calix学习)