Python从Elasticsearch获取数据并输出.csv文件(附完整代码)

# 由于ES限制, 当偏移量>10000时需要特殊处理, 此处略

pip install elasticsearch==6.3.1
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
import csv
import os
import sys
import datetime
import json

# 解决写入文件时中文编码问题
# reload(sys)
# sys.setdefaultencoding('utf-8')

# 存储列名称的list
columnList = []
# 存储value的list
valueList = []


# 获取ES配置json文件
with open('./configs/config.json') as data_file:
    data = json.load(data_file)
    es = Elasticsearch([data[0]["EsHost"]])
    indexName = data[0]["EsIndex"]
    EsBody=data[1]
    print(data[0]['columns'])
    for column in data[0]["columns"]:
        # print(column)
        for col in column:
            # print(column[col])
            columnList.append(column[col])

# 查看连接是否成功
print(es.info())
print(columnList)



# 输出文件路径+文件名,文件名为类似(es_out20210705.csv), 路径选择当前项目路径
today = datetime.date.today()
formatted_today = today.strftime('%Y%m%d')
fileName = "es_out" + formatted_today + ".csv"
filePath = os.getcwd()
print("current file path:", filePath)
file = filePath + "/" + fileName
print(file)


def printCsv(file, dataList):
    # 防止出现空数组
    if (len(dataList) == 0):
        return


    # 文件是否存在
    ifExist = os.path.exists(file)
    print(ifExist)
    if (ifExist):
        # 文件存在, 则在后面追加
        print("file already exist:", file)
        csvFile = open(file, 'a+')
    else:
        # 文件不存在, 则新建文件
        csvFile = open(file, 'w')

    try:
        writer = csv.writer(csvFile)
        if (ifExist == False):
            # 如果是新建的表, 此处定义表的列名信息
            writer.writerow(columnList)

        # 此处循环写入表数据
        for data in dataList:
            for column in columnList:
                col = data['_source'][column]
                valueList.append(col)

            writer.writerow((valueList))
            valueList.clear()


    except IOError as e:
        print("IOError happen in:", e)
    except Exception as e:
        print("Error happen in:", e)
    finally:
        csvFile.close()





def main():
    print("start...")


    # 获取查询结果
    res = es.search(index=indexName, body=EsBody)
    resultList = res['hits']['hits']
    print(resultList)

    # 打印输出结果
    printCsv(file, resultList)



print("end...")

if __name__ == '__main__':
    main()

ES的config.json如下(第二个对象为查询条件)

[
  {
    "EsHost": "ES ip",
    "EsIndex": "ES index",
    "columns": [
      {
        "column1": "evt_dt",
        "column14": "company",
        "column2": "site",
        "column3": "plant",
        "column4": "plant_code",
        "column5": "rpa_process_name",
        "column6": "rpa_times",
        "column7": "dl_idl",
        "column8": "cost_pre_minute",
        "column9": "value",
        "column10": "unit",
        "column11": "amount",
        "column12": "currency",
        "column13": "test"
      }
    ]
  },
  {
    "from": 0,
    "size": 5000,
    "query": {
      "bool": {
        "filter": [
          {
            "bool": {
              "must": [
                {
                  "range": {
                    "evt_dt": {
                      "from": 1625419730000,
                      "to": null,
                      "include_lower": false,
                      "include_upper": true,
                      "boost": 1
                    }
                  }
                }
              ],
              "disable_coord": false,
              "adjust_pure_negative": true,
              "boost": 1
            }
          }
        ],
        "disable_coord": false,
        "adjust_pure_negative": true,
        "boost": 1
      }
    }
  }
]

参考文档:https://juejin.cn/post/6844903927163404296

你可能感兴趣的:(python,python,elasticsearch,csv)