python操作ES的增、删、改、查(单条数据、批量加载、根据id更新、根据id删除)

废话不多说直接上干货!

# ES相关包
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk     


class ElasticSearchClient(object):  # 启动ES
    @staticmethod
    def get_es_servers():
        es_servers = [{
            "host": "localhost",
            "port": "9200"
        }]
        es_client = Elasticsearch(hosts=es_servers)
        return es_client

class LoadElasticSearch(object):  # 在ES中加载、存储和处理数据
    def __init__(self):
        #self.index = 'my-index-cleaned' #"my-index-yzm-1"
        self.index = "my-index-yzm-1"
        
        #self.doc_type = "test-type" #"test-type-yzm-1"
        self.doc_type = "test-type-yzm-1"
        
        self.es_client = ElasticSearchClient.get_es_servers()
        self.set_mapping()

    def set_mapping(self):
        """
        设置mapping
        """
        mapping = {
            self.doc_type: {
                "properties": {
                    "qa_id": {
                        "type": "integer"
                    },
                    "q": {
                        "type": "string"
                    },
                    "a": {
                        "type": "string"
                    },
                    "pos": {
                        "type": "string"
                    },
                    "neg": {
                        "type": "string"
                    }
                }
            }
        }

        if not self.es_client.indices.exists(index=self.index):
            # 创建Index和mapping
            self.es_client.indices.create(index=self.index, body=mapping, ignore=400)
            self.es_client.indices.put_mapping(index=self.index, doc_type=self.doc_type, body=mapping)

    def add_date(self, row_obj):
        """
        单条插入ES
        """
        _id = row_obj.get("_id", 1)
        row_obj.pop("_id")
        self.es_client.index(index=self.index, doc_type=self.doc_type, body=row_obj, id=_id)

    def add_date_bulk(self, row_obj_list):
        """
        批量插入ES
        """
        load_data = []
        i = 1
        bulk_num = 100000  # 10万条为一批
        for row_obj in row_obj_list:
            action = {
                "_index": self.index,
                "_type": self.doc_type,
                "_id": row_obj.get('_id', 'None'),
                "_source": {
                    'qa_id': row_obj.get('qa_id', None),
                    'q': row_obj.get('q', None),
                    'a': row_obj.get('a', None),
                    'pos': row_obj.get('pos', None),
                    'neg': row_obj.get('neg', None),
                }
            }
            load_data.append(action)
            i += 1
            # 批量处理
            if len(load_data) == bulk_num:
                print('插入', i / bulk_num, '批数据')
                print(len(load_data))
                success, failed = bulk(self.es_client, load_data, index=self.index, raise_on_error=True)
                del load_data[0:len(load_data)]
                print(success, failed)

        if len(load_data) > 0:
            success, failed = bulk(self.es_client, load_data, index=self.index, raise_on_error=True)
            del load_data[0:len(load_data)]
            #print(success, failed)
            print('加载成功:',success,'加载失败:',failed)

    def update_by_id(self, row_obj):
        """print('加载成功:',success,'加载失败:',failed)
        根据给定的_id,更新ES文档
        :return:
        """
        _id = row_obj.get("_id", 1)
        row_obj.pop("_id")
        self.es_client.update(index=self.index, doc_type=self.doc_type, body={"doc": row_obj}, id=_id)

    def delete_by_id(self, _id):
        """
        根据给定的id,删除文档
        :return:
        """
        self.es_client.delete(index=self.index, doc_type=self.doc_type, id=42)

你可能感兴趣的:(ES,python)