# 由于ES限制, 当偏移量>10000时需要特殊处理, 此处略
pip install elasticsearch==6.3.1
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
import csv
import os
import sys
import datetime
import json
# 解决写入文件时中文编码问题
# reload(sys)
# sys.setdefaultencoding('utf-8')
# 存储列名称的list
columnList = []
# 存储value的list
valueList = []
# 获取ES配置json文件
with open('./configs/config.json') as data_file:
data = json.load(data_file)
es = Elasticsearch([data[0]["EsHost"]])
indexName = data[0]["EsIndex"]
EsBody=data[1]
print(data[0]['columns'])
for column in data[0]["columns"]:
# print(column)
for col in column:
# print(column[col])
columnList.append(column[col])
# 查看连接是否成功
print(es.info())
print(columnList)
# 输出文件路径+文件名,文件名为类似(es_out20210705.csv), 路径选择当前项目路径
today = datetime.date.today()
formatted_today = today.strftime('%Y%m%d')
fileName = "es_out" + formatted_today + ".csv"
filePath = os.getcwd()
print("current file path:", filePath)
file = filePath + "/" + fileName
print(file)
def printCsv(file, dataList):
# 防止出现空数组
if (len(dataList) == 0):
return
# 文件是否存在
ifExist = os.path.exists(file)
print(ifExist)
if (ifExist):
# 文件存在, 则在后面追加
print("file already exist:", file)
csvFile = open(file, 'a+')
else:
# 文件不存在, 则新建文件
csvFile = open(file, 'w')
try:
writer = csv.writer(csvFile)
if (ifExist == False):
# 如果是新建的表, 此处定义表的列名信息
writer.writerow(columnList)
# 此处循环写入表数据
for data in dataList:
for column in columnList:
col = data['_source'][column]
valueList.append(col)
writer.writerow((valueList))
valueList.clear()
except IOError as e:
print("IOError happen in:", e)
except Exception as e:
print("Error happen in:", e)
finally:
csvFile.close()
def main():
print("start...")
# 获取查询结果
res = es.search(index=indexName, body=EsBody)
resultList = res['hits']['hits']
print(resultList)
# 打印输出结果
printCsv(file, resultList)
print("end...")
if __name__ == '__main__':
main()
ES的config.json如下(第二个对象为查询条件)
[
{
"EsHost": "ES ip",
"EsIndex": "ES index",
"columns": [
{
"column1": "evt_dt",
"column14": "company",
"column2": "site",
"column3": "plant",
"column4": "plant_code",
"column5": "rpa_process_name",
"column6": "rpa_times",
"column7": "dl_idl",
"column8": "cost_pre_minute",
"column9": "value",
"column10": "unit",
"column11": "amount",
"column12": "currency",
"column13": "test"
}
]
},
{
"from": 0,
"size": 5000,
"query": {
"bool": {
"filter": [
{
"bool": {
"must": [
{
"range": {
"evt_dt": {
"from": 1625419730000,
"to": null,
"include_lower": false,
"include_upper": true,
"boost": 1
}
}
}
],
"disable_coord": false,
"adjust_pure_negative": true,
"boost": 1
}
}
],
"disable_coord": false,
"adjust_pure_negative": true,
"boost": 1
}
}
}
]
参考文档:https://juejin.cn/post/6844903927163404296