elasticsearch collapse折叠聚合

目前经验:折叠只能用于筛选数据, 不能进行聚合,需要聚合,自己在内存中聚合

from elasticsearch import Elasticsearch


def query_2coll(index_name):
    query = {'size': 10000,
             "query": {
                 "match_all": {}
             },
             "collapse": {
                 "field": "subject_id",
                 "inner_hits": {
                     "name": "group",
                     "size": 5,
                     "sort": [{"group": "asc"}]
                 },
                 "max_concurrent_group_searches": 4
             }
    }
    try:
        datas = []
        allDoc = es.search(index=index_name, doc_type='koala-index', body=query)

        for data in allDoc['hits']['hits']:
            print(data['_source']['id'], data['_source']['group'], data['_source']['subject_id'])
            # for val, buckets in enumerate(data['_source']):
            #     if val>1:
            #         print('data error')
            # print(buckets['_source'])
            datas.append(data['_source'])
        return datas
    except Exception as e:
        print('query1 error', e)
        return datas


def query(index_name):
    query ={'size':10000,
        "query": {
            "match_all": {}
        },
        "sort": [
            {
                "group": {
                    "order": "asc"
                }
            }
        ],
        "collapse": {
            "field": "group",
                "terms": {
                    "field": "subject_id", 'size': 10000
                }
            },
        "aggs": {
                "agg_sex": {
                    "terms": {
                        "field": "subject_id",'size':10000
                    }
                }
        }
    }
    try:
        datas=[]
        allDoc = es.search(index=index_name, doc_type='koala-index', body=query)

        for data in allDoc['hits']['hits']:
            print(data['_source']['id'],data['_source']['group'],data['_source']['subject_id'])
            # for val, buckets in enumerate(data['_source']):
            #     if val>1:
            #         print('data error')
            # print(buckets['_source'])
            datas.append(data['_source'])
        return datas
    except Exception as e:
        print('query1 error', e)
        return datas

def query_ju(index_name):
    query = {
        "from": 0,
        "size": 10000,
        "sort": [
            {
                "event_id": {
                    "order": "desc"
                }
            }
        ],
        "collapse": {
            "field": "subject_id",
            "inner_hits": {
                "name": "quality",
                "size": 1,
                "sort": [
                    {
                        "event_id": {
                            "order": "asc"
                        }
                    }
                ]
            }
        }
    }
    datas=[]
    try:
        allDoc = es.search(index=index_name, doc_type='koala-index', body=query)

        for data in allDoc['hits']['hits']:
            print(data['_source']['date'])
            # for val, buckets in enumerate(data['_source']):
            #     if val>1:
            #         print('data error')
            # print(buckets['_source'])
            # datas.append(buckets['_source'])
        return datas
    except Exception as e:
        print('query1 error', e)
        return datas

if __name__ == '__main__':
    es = Elasticsearch(['127.0.0.1:9200'])
    index_name = 'event_tk'
    query_2coll(index_name)

你可能感兴趣的:(ElasticSearch)