from elasticsearch import Elasticsearch
import re
import sys
sys.setrecursionlimit(100000)#设置递归深度为十万
from elasticsearch import helpers
import matplotlib.pyplot as plt
import datetime,time
class extract_memory_leaks(object):
def __init__(self,ip,user_name,password,process_name,interval_time):
"""
:param ip:
:param user_name:
:param password:
:param process_name:
:param interval_time:
"""
self.ip = ip
self.user_name = user_name
self.password = password
self.process_name = process_name
self.interval_time = interval_time
def draw_picture(self,process_slope_data_dict,index1,index2):
s = 0.0
for index3 in range(0,
len(process_slope_data_dict[self.process_name + "_slope_y_list_caculate"][
index1:index2]) - 2):
a = process_slope_data_dict[self.process_name + "_slope_x_list_caculate"][index1:index2][
index3 + 1] * \
process_slope_data_dict[self.process_name + "_slope_y_list_caculate"][index1:index2][
index3] - process_slope_data_dict[self.process_name + "_slope_x_list_caculate"][index1:index2][
index3] * \
process_slope_data_dict[self.process_name + "_slope_y_list_caculate"][index1:index2][
index3 + 1]
# a =process_slope_data_dict[process_name + "_slope_x_list_caculate"][index1:end_index][index3 + 1 ] * process_slope_data_dict[process_name + "_slope_y_list_caculate"][index1:end_index][index3] - process_slope_data_dict[process_name + "_slope_x_list_caculate"][index1:end_index][index3] * process_slope_data_dict[process_name + "_slope_y_list_caculate"][index1:end_index][index3 + 1]
s += a
b = process_slope_data_dict[self.process_name + "_slope_x_list_caculate"][index1:index2][-1] * \
process_slope_data_dict[self.process_name + "_slope_y_list_caculate"][index1:index2][-1] - \
process_slope_data_dict[self.process_name + "_slope_x_list_caculate"][index1:index2][0] * \
process_slope_data_dict[self.process_name + "_slope_y_list_caculate"][index1:index2][0]
if (s + b) / 2 > 0:
plt.plot(process_slope_data_dict[self.process_name + "_slope_x_list"][index1:index2],
process_slope_data_dict[self.process_name + "_slope_y_list"][index1:index2], '-o')
plt.show()
def filter_memory_leak(self):
"""
#设置过滤条件
:return:
"""
es = Elasticsearch(hosts=self.ip, http_auth=(self.user_name, self.password))
body = {
"_source" : {"includes" : [ "@timestamp","dmname","bytes"]},
"query":{
"bool":{
"must":[
{
"term":{
"Device_Identity":"10.245.57.110"
}
}
]
}
},
"sort":{
"@timestamp":{
"order":"asc"
}
}
}
query = es.search(index='devices_mem_check-20*',body=body,scroll='5m',size=100)
results = query['hits']['hits'] # es查询出的结果第一页
total = query['hits']['total'] # es查询出的结果总量
scroll_id = query['_scroll_id'] # 游标用于输出es查询出的所有结果
# 得到所有的数据
for i in range(0, int(total / 100) + 1):
query_scroll = es.scroll(scroll_id=scroll_id, scroll='5m')['hits']['hits']
results += query_scroll
#将所有数据分类保存在contain_data_dict中,其中包含了每一个数据的进程名称、dm_mem值以及对应的时间
contain_data_dict = {}
process_name_list = []
for hits in results:
# print(hits)
if hits["_source"]["dmname"] in contain_data_dict: # 判断dmname是否已经在contain_name_list的键中
# 判断contain_data_dict[hits["_source"]["dmname"] + '_timestamp']中是否已经包含该个时间戳
if hits["_source"]["@timestamp"] not in contain_data_dict[hits["_source"]["dmname"] + '_timestamp']:
contain_data_dict[hits["_source"]["dmname"]].append(hits["_source"]["bytes"])
contain_data_dict[hits["_source"]["dmname"] + '_timestamp'].append(hits["_source"]["@timestamp"])
else:
contain_data_dict[hits["_source"]["dmname"]] = []
contain_data_dict[hits["_source"]["dmname"] + '_timestamp'] = []
process_name_list.append(hits["_source"]["dmname"])
contain_data_dict[hits["_source"]["dmname"]].append(hits["_source"]["bytes"])
contain_data_dict[hits["_source"]["dmname"] + '_timestamp'].append(hits["_source"]["@timestamp"])
# 将信息进行处理之后,放入process_slope_data_dict中
process_slope_data_dict = {}
for process_name in process_name_list:
process_slope_data_dict[process_name + "_slope_y_list"] = []
process_slope_data_dict[process_name + "_slope_x_list"] = []
process_slope_data_dict[process_name + "_slope_y_list_caculate"] = []
# process_slope_data_dict[process_name + "_slope_x_list_caculate"] = []
for index in range(1, len(contain_data_dict[process_name])):
slope_y = contain_data_dict[process_name][index] - contain_data_dict[process_name][index - 1]
# 将时间进行格式化处理,方便后续进行时间的相加减
# time = time + datetime.timedelta(days=1)当前时间加一天
# 每次循环的时间点的第一个时间点
middle_time1 = re.sub("T", ' ', contain_data_dict[process_name + "_timestamp"][index - 1][:19])
time1 = datetime.datetime.strptime(middle_time1, "%Y-%m-%d %H:%M:%S")
# 每次循环的时间点
middle_time2 = re.sub("T", ' ', contain_data_dict[process_name + "_timestamp"][index][:19])
time2 = datetime.datetime.strptime(middle_time2, "%Y-%m-%d %H:%M:%S")
# 尽管在上面已经去重,但是是时分秒最后部分不一样,此时再次去重
if time1 == time2 and index < len(contain_data_dict[process_name + "_timestamp"]):
continue
# 求出每段时间真正的斜率:时间间隔以秒隔开,用以计算面积图
# time_interval = time.mktime(time2.timetuple()) - time.mktime(time1.timetuple())
# process_slope_data_dict[process_name + "_slope_y_list_caculate"].append(slope_y / time_interval)
# process_slope_data_dict[process_name + "_slope_x_list_caculate"].append(time.mktime(time2.timetuple()))
# 以循环的那个时间点作为斜率图的横坐标
process_slope_data_dict[process_name + "_slope_x_list"].append(time2)
process_slope_data_dict[process_name + "_slope_y_list_caculate"].append(contain_data_dict[process_name][index])
if slope_y < 0:
process_slope_data_dict[process_name + "_slope_y_list"].append(-1)
elif slope_y == 0:
process_slope_data_dict[process_name + "_slope_y_list"].append(0)
else:
process_slope_data_dict[process_name + "_slope_y_list"].append(1)
start_point = 0 # 设置做边界
for index1 in range(0, len(process_slope_data_dict[self.process_name + "_slope_x_list"])):
if index1 == start_point: # 令循环从边界开始
end_time = process_slope_data_dict[self.process_name + "_slope_x_list"][index1] + datetime.timedelta(
days=self.interval_time)
if end_time <= process_slope_data_dict[self.process_name + "_slope_x_list"][-1]:
count1 = 0 # 添加计数器,斜率图中,每有一个正数,count+1
index_middle_list = [] # 找出不超过时间间隔的最大的那个索引
for index2, index2_data in enumerate(process_slope_data_dict[self.process_name + "_slope_x_list"]):
if index2_data <= end_time:
index_middle_list.append(index2)
end_index = index_middle_list[-1]
for y_row in process_slope_data_dict[self.process_name + "_slope_y_list"][index1:end_index + 1]:
if y_row == 1:
count1 += 1
start_point += 1
if count1 / (end_index + 1 - index1) >= 0.7: # 如果在一段时间内,内存有90的概率都在上升,那么可以判定为异常部分
while count1 / (end_index + 1 - index1) >= 0.7:
if end_index + 1 < len(
process_slope_data_dict[self.process_name + "_slope_x_list"]): # 指针右移,判断总体是否还大于90%
end_index += 1
if process_slope_data_dict[self.process_name + "_slope_y_list"][end_index] == 1:
count1 += 1
else:
break
# 此时已经找出了斜率上升率达到90%的部分,那么只需计算出最后一个数值是否大于第一个数值,如果大于说明内存在那段时间内处于上升部分,判定为异常
# self.draw_picture(process_slope_data_dict,index1,end_index)
if process_slope_data_dict[self.process_name + "_slope_y_list_caculate"][end_index] > process_slope_data_dict[self.process_name + "_slope_y_list_caculate"][index1]:
plt.plot(process_slope_data_dict[self.process_name + "_slope_x_list"][index1:end_index],
process_slope_data_dict[self.process_name + "_slope_y_list"][index1:end_index], '-o')
plt.show()
start_point = end_index
else: # 如果指针指向了最后一个数,那么条件终止,直接画出最后一段
count2 = 0
for y_row in process_slope_data_dict[self.process_name + "_slope_y_list"][index1:]:
if y_row == 1:
count2 += 1
if count2 / (len(process_slope_data_dict[self.process_name + "_slope_y_list"]) + 1 - index1) >= 0.7:
# self.draw_picture(process_slope_data_dict,index1,-1)
if process_slope_data_dict[self.process_name + "_slope_y_list_caculate"][-1] > process_slope_data_dict[self.process_name + "_slope_y_list_caculate"][index1]:
plt.plot(process_slope_data_dict[self.process_name + "_slope_x_list"][index1:],
process_slope_data_dict[self.process_name + "_slope_y_list"][index1:], '-o')
plt.show()
obtain_initial_data = extract_memory_leaks()
obtain_initial_data.filter_memory_leak()
#plmgrd 3
#ipmgrd 1
#ifmd
# print(process_slope_data_dict["plmgrd_slope_x_list"])
#ipmgrd ifmd plmgrd 4天 1,2,3三幅图
#ifmd 2天 4,5,6三幅图
# #actions列表中是所有要返回到es中的数据
# # actions = []
# # for process_name in process_name_list:
# # # print(i)
# # for index,data in enumerate(contain_data_dict[process_name]):
# # action = {
# # "_index": "zf_process_test",
# # "_type": "doc",
# # "_source": {
# # "dmname": process_name,
# # "bytes":data,
# # "@timestamp": contain_data_dict[process_name + "_timestamp"][index]
# # }
# # }
# # actions.append(action)
# #
# # helpers.bulk(es,actions,index='zf_process_test', raise_on_error=True)