es批量增删查

#!/usr/bin/env python

-- coding:utf-8 --

from elasticsearch.helpers import bulk
from elasticsearch.helpers import scan
import logging,arrow,time
from elasticsearch import Elasticsearch

class DealCeleryData():
def init(self, es, c_index, m_index):
self.es = es
self.c_index = c_index
self.m_index = m_index
def do_scroll(self, index, body):
try:
# raises an exception (ScanError) if an error is encountered (some shards fail to execute). 设置为False是为了考虑存在shard故障
return scan(client=self.es, query=body, index=index, scroll=u"10m", size=5000, request_timeout=30,
raise_on_error=False)
except Exception as e:
logging.error(“ERROR:get data form es by scan”, e)
return False
def delete_index(self, del_index, body):
# body = {“query”: {“match_all”: {}}}
self.es.delete_by_query(index=del_index, body=body, request_timeout=120, wait_for_completion=False)
def getData(self):
result = self.do_scroll(index=self.c_index, body=body)
new_each_record = each_record[’_source’]
yield {
‘_op_type’: ‘index’,
‘_index’: self.m_index,
‘_type’: “doc”,
'source’: new_each_record
}
def worker(self, actions, chunk_size=100, max_chunk_bytes=100 * 1024 * 1024):
try:
success, _ = bulk(self.es, actions=actions, chunk_size=chunk_size, max_chunk_bytes=max_chunk_bytes,
raise_on_error=True, request_timeout=180)
# print(“success:” + str(success))
# print(
)
except Exception as e:
print(“ERROR:bulk ,”, e)
logging.error(e, exc_info=1)

dcObject = DealCeleryData(es, c_index, m_index)
stream = dcObject.getData()
dcObject.worker(stream)

你可能感兴趣的:(笔记)