python监控ES索引数量变化

文章目录

  • 1, datafram根据相同的key聚合
  • 2, 数据合并:获取采集10,20,30分钟es索引数据
    • 脚本测试验证

1, datafram根据相同的key聚合

# 创建df1 ==> json {'key':'A', 'value':1 } {'key':'B', 'value':2 }
data1 = {'key': ['A', 'B'],
'value': [1, 2]}
df1 = pd.DataFrame(data1)

# 创建df2 ==> {'key':'A', 'value':11 } {'key':'B', 'value':22 }
data2 = {'key': ['A', 'B'],
'value': [11, 22]}
df2 = pd.DataFrame(data2)

# 创建df3 ==>{'key':'A', 'value':111 } {'key':'B', 'value':222 } {'key':'C', 'value':333 }
data3 = {'key': ['A', 'B', 'c'],
'value': [111, 222, 333]}
df3 = pd.DataFrame(data3)



#### 聚合两个dataframe  
#==> {'key':'A', 'value_x':1, 'value_y':11 } {'key':'B', 'value_x':2, 'value_y':22 }
>>> mdf1=pd.merge(df1, df2, on='key')
>>> mdf1
  key  value_x  value_y
0   A        1       11
1   B        2       22
#### 再聚合两个dataframe 
#==> {'key':'A',  'value_x':1, 'value_y':11 , 'value':111 } {'key':'B', 'value_x':2, 'value_y':22 , 'value':222 }
mdf = pd.merge(pd.merge(df1, df2, on='key'), df3, on='key') 
>>> mdf2=pd.merge(mdf1, df3, on='key')
>>> mdf2
  key  value_x  value_y  value
0   A        1       11    111
1   B        2       22    222

2, 数据合并:获取采集10,20,30分钟es索引数据

[root@localhost ] # cat es-indices-monitor.py
import json
import time
import requests
import os
import sys
import glob
import pandas as pd

def deloldfile(workdir):
  # 获取目录下所有的文件
  all_files = glob.glob(os.path.join(workdir, '*'))
  
  # 将文件名和访问时间存入列表
  file_list = []
  for file in all_files:
    file_list.append((file, os.path.getatime(file)))
  
  # 根据访问时间排序
  file_list.sort(key=lambda x: x[1], reverse=False)
  
  # 删除旧文件,只保留最新的文件
  for file in file_list[:-3]: # 排除最后三个文件,因为它是最新的
    os.remove(file[0])

def createfile(workdir,fileName):
   if not os.path.exists(workdir):
     os.makedirs(workdir)
   #os.system("find {}/*.json   -type f -ctime +1 -delete".format(workdir) )
   #for fileName in os.listdir(workdir):
   file=open(workdir+fileName,'w',encoding="utf-8")
   return file

def readfile(workdir):
  if not os.path.exists(workdir):
    os.makedirs(workdir)

  # 获取目录下所有的文件
  all_files = glob.glob(os.path.join(workdir, '*'))
  
  # 将文件名和访问时间存入列表
  file_list = []
  for file in all_files:
    file_list.append((file, os.path.getatime(file)))

  # 根据访问时间排序
  files=[]
  file_list.sort(key=lambda x: x[1], reverse=False)
  for file in file_list: # 排除最后两个文件,因为它是最新的
    files.append(file[0])
  return files

def writejson(file,jsonArr):
  for js in jsonArr:
    jstr=json.dumps(js)+"\n"
    file.write(jstr)
  file.close()

#3,json转字符串
def getdata(domain,password):
  url = "http://"+domain+"/_cat/indices?format=json"
  # 设置认证信息
  auth = ('elastic', password)
  
  # 发送GET请求,并在请求中添加认证信息
  response = requests.get(url, auth=auth)
  
  # 检查响应状态码,如果成功则打印响应内容
  if response.status_code == 200:
     #遍历返回的json数组,提取需要的字段
     jsonArr=json.loads(response.text)
     df = pd.json_normalize(jsonArr)
     dfnew = df.drop(["uuid","docs.deleted"], axis=1)
     #print(dfnew)
     
     #保存_cat/es/indices数据到json文件
     workdir="/data/es-indices/"
     workdir_tmp=workdir+"tmp/"
     f_time = time.strftime("%Y-%m-%d_%H-%M-%S",time.localtime())
     filename="es-data-{}.json".format(f_time)
     filename_tmp="tmp-{}.json".format(f_time)

     file=createfile(workdir_tmp,filename_tmp)
     writejson(file,jsonArr)

     #删除旧文件,只保留2个最新的
     deloldfile(workdir_tmp)
     deloldfile(workdir)
     files=readfile(workdir_tmp)
     #df1=pd.read_json(files[0],lines=True,convert_dates=False)
     if len(files) > 1:
       print(files[0])
       print(files[1])
       df1=pd.read_json(files[0],lines=True)
       df2=pd.read_json(files[1],lines=True)
       #"health","status","index","uuid","pri","rep","docs.count","docs.deleted","store.size","pri.store.size"
       df1 = df1.drop(["health","status","uuid","pri","rep","docs.deleted","store.size","pri.store.size"], axis=1)
       df2 = df2.drop(["health","status","uuid","pri","rep","docs.deleted","store.size","pri.store.size"], axis=1)
       mdf = pd.merge(df1, df2, on='index', how='outer')
       #print(df1)
     else:
       mdf=dfnew

     #聚合3条数据,查看索引文档数量是否变化: 近10分钟的数量为doc.count, 前10分钟的数量为doc.count_x, 前20分钟的数量为doc.count_y, 
     #print(mdf) 
     mdf2 = pd.merge(dfnew, mdf, on='index', how='outer')
     mdf2 = mdf2.rename(columns={"docs.count_x":"docs.count_30", "docs.count_y":"docs.count_20"})
     #print(mdf2) 

     file=createfile(workdir,filename)
     for idx,row in mdf2.iterrows():
       jstr=row.to_json()
       file.write(jstr+"\n")
     file.close()
  else:
    print('请求失败,状态码:', response.status_code)
  

domain="196.1.0.106:9200"
password="123456"
getdata(domain,password)

脚本测试验证

[root@localhost] #  python3 es-indices-monitor.py
/data/es-indices/tmp/tmp-2023-09-28_13-56-12.json
/data/es-indices/tmp/tmp-2023-09-28_14-11-47.json

#查看结果
[root@localhost] # /appset/ldm/script # ll /data/es-indices/
total 148
-rw------- 1 root root 46791 Sep 28 13:56 es-data-2023-09-28_13-56-12.json
-rw------- 1 root root 46788 Sep 28 14:11 es-data-2023-09-28_14-11-47.json
-rw------- 1 root root 46788 Sep 28 14:12 es-data-2023-09-28_14-12-07.json
drwx------ 2 root root  4096 Sep 28 14:12 tmp
[root@localhost] # /appset/ldm/script # ll /data/es-indices/tmp/
total 156
-rw------- 1 root root 52367 Sep 28 13:56 tmp-2023-09-28_13-56-12.json
-rw------- 1 root root 52364 Sep 28 14:11 tmp-2023-09-28_14-11-47.json
-rw------- 1 root root 52364 Sep 28 14:12 tmp-2023-09-28_14-12-07.json

#核对文档数量
[root@localhost] # /appset/ldm/script # head  -n 2 /data/es-indices/es-data-2023-09-28_13-56-12.json  |grep 2023_09 |grep count
{"health":"green","status":"open","index":"test_2023_09","pri":"3","rep":"1","docs.count":"14393","store.size":"29.7mb","pri.store.size":"13.9mb","docs.count_30":14391.0,"docs.count_20":14393.0}

[root@localhost] # /appset/ldm/script # head  -n 2 /data/es-indices/es-data-2023-09-28_14-11-47.json  |grep 2023_09 |grep count
{"health":"green","status":"open","index":"test_2023_09","pri":"3","rep":"1","docs.count":"14422","store.size":"33.5mb","pri.store.size":"15.8mb","docs.count_30":14391.0,"docs.count_20":14393.0}

[root@localhost] # /appset/ldm/script # head  -n 2 /data/es-indices/es-data-2023-09-28_14-12-07.json  |grep 2023_09 |grep count
{"health":"green","status":"open","index":"test_2023_09","pri":"3","rep":"1","docs.count":"14427","store.size":"33.5mb","pri.store.size":"15.8mb","docs.count_30":14393.0,"docs.count_20":14422.0}

在这里插入图片描述

你可能感兴趣的:(Elasticsearch,编程语言-python,python,pandas,es文档数量监控)