Python脚本之操作Elasticsearch【二】

本文为博主原创,未经授权,严禁转载及使用。
本文链接:https://blog.csdn.net/zyooooxie/article/details/124640467

之前分享过 使用elasticsearch库【一】https://blog.csdn.net/zyooooxie/article/details/109588072 , 继续分享下 ;

【实际这篇博客推迟发布N个月】

个人博客:https://blog.csdn.net/zyooooxie

【以下所有内容仅为个人项目经历,如有不同,纯属正常】

操作Document

"""
@blog: https://blog.csdn.net/zyooooxie
@qq: 153132336
@email: [email protected]
"""


def test_0821(index_str: str):
    exist_id = 'xie-xie-xie-xie-xie'
    no_exist_id = 'xie'

    list_ = [gl_es_host_new, gl_es_host_new_2, gl_es_host_new_3, gl_es_host_new_4]
    # list_ = [gl_es_host_new]

    for es_host in list_:
        Log.info('')
        Log.error(es_host)

        client = Elasticsearch(es_host, sniff_on_start=True, sniff_on_node_failure=True, request_timeout=60,
                               http_auth=gl_es_auth)

        # Returns whether the cluster is running.
        Log.info(client.ping())

        # Returns basic information about the cluster.
        Log.info(client.info())

        # Returns number of documents matching a query.
        Log.info(client.count(index=index_str))
        Log.info(client.count(index=index_str,
                              body={'query': term_terms_change(index_str=index_str, client=client,
                                                               field='xie_f',
                                                               value='xie_v')}))

        # The filter_path parameter is used to reduce the response returned by elasticsearch.
        Log.info(client.search(index=index_str, filter_path=['hits.hits._id', 'hits.hits._type', 'hits.total']))

        # It also supports the * wildcard character to match any field or part of a field’s name
        Log.info(client.search(index=index_str, filter_path=['hits.hits.*']))

        # Returns information about whether a document exists in an index.
        Log.info(client.exists(index=index_str, id=exist_id))
        Log.info(client.exists(index=index_str, id=no_exist_id))

        # Returns the source of a document.
        Log.info(client.get_source(index=index_str, id=exist_id))

        # Allows to get multiple documents in one request.
        Log.info(client.mget(index=index_str, body={'ids': [exist_id, no_exist_id]}))

        # Closes the Transport and all internal connections
        client.close()

        Log.info('')


def test_es_indices_management(client: Elasticsearch):
    """

    :param client:
    :return:
    """

    # 索引中每个文档都有 类型 。每种类型都有它自己的 映射 ,或者 模式定义 。
    # 映射定义了 类型 中的 域,每个域的数据类型,以及Elasticsearch如何处理这些域。

    # Elasticsearch 支持如下简单域类型:
    # 字符串: string
    # 整数 : byte, short, integer, long
    # 浮点数: float, double
    # 布尔型: boolean
    # 日期: date

    # 当你索引一个包含新域的文档 之前未曾出现-- Elasticsearch 会使用 动态映射 ,通过JSON中基本数据类型,尝试猜测域类型。

    Log.info('********')

    # 下面是索引 两个 最重要的设置:
    # number_of_shards  每个索引的主分片数 。这个配置在索引创建后不能修改。
    # number_of_replicas    每个主分片的副本数 。后期可以动态修改。

    # primary shard:主分片,每个文档都存储在一个分片中,当你存储一个文档的时候,系统会首先存储在主分片中,然后会复制到不同的副本中。
    # replica shard:副本分片,每一个分片有零个或多个副本。副本主要是主分片的复制,可以 增加高可用性,提高性能。

    abc = 'xie' + str(random.randint(1, 9999))

    # The 'body' parameter is deprecated for the 'create' API
    Log.info(client.indices.create(index=abc,
                                   mappings={
                                       "properties": {
                                           "test_{}".format(abc): {"type": "text"}
                                       }
                                   },
                                   settings={
                                       "number_of_shards": 1
                                   })
             )

    Log.info(client.indices.exists(index=abc))

    Log.info(client.indices.stats(index=abc))

    Log.info(client.indices.get_mapping(index=abc))
    Log.info(client.indices.get_settings(index=abc))

    Log.info(client.indices.delete(index=abc))
    Log.info(client.indices.exists(index=abc))

	
def _es_get(index_str: str, client: Elasticsearch, id_str: str, doc_type: str = gl_type, **kwargs):
    """

    :param index_str:
    :param client:
    :param id_str:
    :param doc_type: The type of the document (use `_all` to fetch the first document matching the ID across all types)
    :param kwargs:
    :return:
    """

    res = client.exists(index=index_str,
                        id=id_str)  # Returns information about whether a document exists in an index.
    if not res:
        Log.error(f'当前id:{id_str} 不存在')
        return

    Log.info(client.get(index=index_str, id=id_str, **kwargs))


def _es_delete(index_str: str, client: Elasticsearch, id_str: str, **kwargs):
    """

    :param index_str:
    :param client:
    :param id_str:
    :param kwargs:
    :return:
    """

    res = client.exists(index=index_str, id=id_str)
    if not res:
        Log.error(f'不存在:{id_str}')
        return

    res = client.delete(index=index_str, id=id_str, **kwargs)
    Log.info(res)

    assert res.get('result') == 'deleted'

    return res


def _es_delete_by_query(index_str: str, client: Elasticsearch, body: dict, **kwargs):
    """

    :param index_str:
    :param client:
    :param body: The search definition using the Query DSL
    :param kwargs:
    :return:
    """

    # scroll_size: Size on the scroll request powering the delete by query
    # Default: 100
    res = client.delete_by_query(index=index_str, body=body, scroll_size=1000, **kwargs)
    Log.info(res)
    Log.info(f'{res.get("total")}, {res.get("deleted")}')

    return res


def _es_create(index_str: str, client: Elasticsearch, id_str: str, document: dict, **kwargs):
    """

    :param index_str:
    :param client:
    :param id_str:
    :param document:
    :param kwargs:
    :return:
    """
    res = client.exists(index=index_str, id=id_str)

    if res:
        Log.error(f'已创建:{id_str}')
        return

    res = client.create(index=index_str, id=id_str, document=document, **kwargs)
    Log.info(res)

    assert res.get('result') == 'created'
    return res


def _es_index(index_str: str, client: Elasticsearch, document: dict, id_str: str = None, **kwargs):
    """

    :param index_str:
    :param client:
    :param document:
    :param id_str:
    :param kwargs:
    :return:
    """

    # # ✅ New usage:
    # es.index(document={...})
    #
    # # ❌ Deprecated usage:
    # es.index(body={...})

    if id_str:  # 用全量覆盖的方式更新 某id

        res = client.exists(index=index_str, id=id_str)
        if not res:
            Log.error(f'当前id:{id_str} 不存在')
            return

        res = client.index(index=index_str, document=document, id=id_str, **kwargs)
        assert res.get('result') == 'updated'

    else:

        res = client.index(index=index_str, document=document, **kwargs)
        assert res.get('result') == 'created'

    Log.info(res)
    return res


def _es_update(index_str: str, client: Elasticsearch, id_str: str, doc: dict, **kwargs):
    """
    Enables you to script document updates. The script can update, delete, or skip modifying the document.

    The update API also supports passing a partial document, which is merged into the existing document.

    To fully replace an existing document, use the index API.

    部分更新,可用update();全量更新,使用index();
    :param index_str:
    :param client:
    :param id_str:
    :param doc:
    :param kwargs:
    :return:
    """

    res = client.exists(index=index_str, id=id_str)
    if res:

        # The 'body' parameter is deprecated for the 'update' API and will be removed in a future version. Instead use API parameters directly.

        res = client.update(index=index_str, id=id_str, doc=doc, **kwargs)
        Log.info(res)

        return res

    else:
        Log.error(f'当前id:{id_str} 不存在')


def _bulk_actions(index_str: str,
                  id_str: List[str], body: Union[List[dict], List[int]],
                  op_type: str = 'index'):
    """

    :param index_str:
    :param id_str:
    :param body:
    :param op_type: defaults to index
    :return:
    """
    # All bulk helpers accept an instance of {es} class and an iterable action (any iterable, can also be a generator, which is ideal in most cases since it allows you to index large datasets without the need of loading them into memory).

    ib_list = list(zip(id_str, body))

    for id_body in ib_list:

        actions_dict = {"_index": index_str, "_id": id_body[0], '_op_type': op_type}

        # The bulk() api accepts index, create, delete, and update actions.
        # Use the _op_type field to specify an action (_op_type defaults to index)
        if op_type == 'index' or op_type == 'create':
            actions_dict.update(id_body[1])

        elif op_type == 'delete':
            pass

        elif op_type == 'update':
            actions_dict.update(doc=id_body[1])

        else:
            raise Exception('传参有误')

        yield actions_dict


def _es_bulk(index_str: str, client: Elasticsearch,
             body_list: List[dict], id_list: List[str],
             op_type: str, **kwargs):
    """

    :param index_str:
    :param client:
    :param body_list:
    :param id_list:
    :param op_type:
    :param kwargs:
    :return:
    """

    # https://elasticsearch-py.readthedocs.io/en/v7.17.0/helpers.html#bulk-helpers
    actions = _bulk_actions(index_str=index_str, body=body_list, id_str=id_list, op_type=op_type)

    from elasticsearch.helpers import bulk
    res = bulk(client=client, actions=actions, **kwargs)
    Log.info(res)

    return res


def _es_analyze(index_str: str, client: Elasticsearch, text: str):
    """

    :param index_str:
    :param client:
    :param text: 建议 只使用str;If an array of strings is provided, it is analyzed as a multi-value field.
    :return:
    """

    # Elasticsearch 中的数据可以概括的分为两类:精确值和全文。
    # 精确值 如它们听起来那样精确。例如日期或者用户 ID,但字符串也可以表示精确值,例如用户名或邮箱地址。对于精确值来讲,Foo 和 foo 是不同的,2014 和 2014-09-15 也是不同的。
    # 另一方面,全文 是指文本数据(通常以人类容易识别的语言书写),例如一个推文的内容或一封邮件的内容。

    # 当我们 索引 一个文档,它的全文域被分析成词条以用来创建倒排索引。
    # 但是,当我们在全文域 搜索 的时候,我们需要将查询字符串通过 相同的分析过程 ,以保证我们搜索的词条格式与索引中的词条格式一致。

    # 当你查询一个 全文 域时, 会对查询字符串应用相同的分析器,以产生正确的搜索词条列表。
    # 当你查询一个 精确值 域时,不会分析查询字符串,而是搜索你指定的精确值。

    Log.info('********')

    # By default, Elasticsearch changes the values of text fields during analysis.

    # For example, the default standard analyzer changes text field values as follows:
    # 1.Removes most punctuation
    # 2.Divides the remaining content into individual words, called tokens
    # 3.Lowercases the tokens

    Log.info(text)

    # analyzer
    # 分析器可以由每个字段决定。每个字段都可以有不同的分析器,既可以通过配置为字段指定分析器,也可以使用更高层的类型(type)、索引(index)或节点(node)的默认配置

    # If this parameter is not specified, the analyze API uses the analyzer defined in the field’s mapping.
    #
    # If no field is specified, the analyze API uses the default analyzer for the index.
    #
    # If no index is specified, or the index does not have a default analyzer, the analyze API uses the standard analyzer.

    res = client.indices.analyze(body={'text': text}, index=index_str)
    Log.info(res)

    tokens_list = res.get('tokens')

    if len(tokens_list) > 1:

        return [tl.get('token') for tl in tokens_list]
        # token 是实际存储到索引中的词条。
        # position 指明词条在原始文本中出现的位置。 start_offset 和 end_offset 指明字符在原始字符串中的位置。

    else:
        return [tokens_list[0].get('token')]


def query_term(index_: str, es_: Elasticsearch,
               field: str = None, value: Union[str, int] = None,
               field_field: str = None):
    """
    【搜索条件 field=value、field.field_field=value】
    :param index_:
    :param es_:
    :param field:
    :param value:
    :param field_field:
    :return:
    """

    search_dict = gl_search_dict.copy()

    if field_field:

        search_dict.update(query=term_terms_change(index_str=index_, client=es_,
                                                   field='.'.join([field, field_field]), value=value))

    elif field:
        search_dict.update(query=term_terms_change(index_str=index_, client=es_,
                                                   field=field, value=value))

    else:
        return

    print_result(index_, search_dict)
    return search_dict.get('query')


def print_result(index_: str, dict_1: dict):
    print()

    res = f'GET /{index_}/_doc/_search'
    print(res)

    res_ = json.dumps(dict_1, ensure_ascii=False)
    print(res_)

    print()


def term_terms_change(index_str: str, client: Elasticsearch, field: str, value: Any,
                      simple_use: bool = False,
                      term_terms: str = 'term', **kwargs
                      ) -> dict:
    """

    :param index_str:
    :param client:
    :param field:
    :param value:
    :param simple_use:
    :param term_terms:
    :param kwargs:
    :return:
    """
    assert term_terms in ['term', 'terms']

    if not value:
        return {term_terms: {field + '.keyword': value}, **kwargs}

    if simple_use:
        return {term_terms: {field: value}, **kwargs}

    # https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-term-query.html#avoid-term-query-text-fields

    # The term query does not analyze the search term. The term query only searches for the exact term you provide.
    # This means the term query may return poor or no results when searching text fields.

    analyze_list = _es_analyze(index_str=index_str, client=client, text=value)
    Log.info(analyze_list)

    if value in analyze_list and isinstance(value, str):

        return {term_terms: {field: value}, **kwargs}

    else:

        return {term_terms: {field + '.keyword': value}, **kwargs}


def es_func(func_name: str, **kwargs):
    """

    :param func_name:
    :param kwargs:
    :return:
    """

    func_list = ['_es_get', '_es_delete', '_es_create', '_es_index', '_es_search', '_es_update', '_es_bulk',
                 '_es_delete_by_query']

    if not kwargs.get('client'):

        client = connect_es_client(gl_es_host_new, gl_es_auth)

        kwargs.update(client=client)

        close_ = True

    else:

        close_ = False

    # Log.debug(f'传参:{kwargs}')

    try:

        assert func_name in func_list

        # 两种方式 二选一
        # globals().get(func_name)(**kwargs)
        return getattr(sys.modules[__name__], func_name)(**kwargs)

    except AssertionError:

        Log.error('断言失败')
        Log.error(traceback.format_exc())

    except BulkIndexError:

        Log.error('Bulk 遇到错误,被中断')
        Log.error(traceback.format_exc())

    except Exception as e:

        Log.error(e.args)
        Log.error(traceback.format_exc())

    finally:

        time.sleep(1)

        Log.info('es_func() 执行结束')

        if close_:
            close_es_client(client=client)


def get_seq_max(index_str: str, client: Elasticsearch):
    """

    :param index_str:
    :param client:
    :return:
    """

    # res_list = es_func('_es_search', client=client, index_str=index_str, body={}, sort_='seq:desc', size_=1)
    # res_list = _es_search(client=client, index_str=index_str, body={}, sort_='seq:desc', size_=1)

    # q = {'constant_score': {'filter': {'exists': {'field': 'seq'}}}}
    q = {'exists': {'field': 'seq'}}
    res_list = es_func('_es_search', client=client, index_str=index_str, query=q,
                       sort_={"seq": {"order": "desc"}}, size_=1)

    if not res_list:
        seq = 0

    else:
        seq = res_list[0].get('_source').get('seq')

    Log.error(f'当前最大seq:{seq}')
    return seq


"""
@blog: https://blog.csdn.net/zyooooxie
@qq: 153132336
@email: [email protected]
"""


if __name__ == '__main__':
    pass

    es = connect_es_client(gl_es_host_new, gl_es_auth)

    Log.error('')

    test_index_ = 'zyooooxie-data'
    user_ = 'zyooooxie'

    # test_0821(index_str=test_index_)

    index_id = 'xxxxx'
    r_m = random.randrange(99999)

    # query = query_term(field='text', field_field='content', value='你好', index_=test_index_, es_=es)
    # Log.info(query)
    #
    # result = es_func('_es_search', client=es, index_str=test_index_, query=query, size_=10, sort_='seq',
    #                  _source_excludes=['seq'])
    # Log.info(result)
    #
    # result = es_func('_es_search', client=es, index_str=test_index_, query=query, size_=10, sort_=['xxxXXX', 'seq'],
    #                  _source_includes=['xxxXXX', 'seq'])
    # Log.info(result)
    #
    # result = es_func('_es_search', client=es, index_str=test_index_, query=query, size_=10,
    #                  sort_=[{"xxxXXX": {"order": "desc"}}, "seq"],
    #                  _source_includes=['xxxXXX', 'seq',  'text'])
    # Log.info(result)

    # Log.error('')

    # Log.info(es.indices.delete(index=test_index_))

    # get_seq_max(index_str=test_index_, client=es)
    # get_seq_max(index_str='test_index_123', client=es)

    # Log.info('')
    #
    # abc_m = _es_analyze(index_str=test_index_, client=es, text='wrzgKFCgBBxxxXXX')
    # Log.info(abc_m)
    #
    #
    # abc_m = _es_analyze(index_str=test_index_, client=es,
    #                     text=['wrzgKFCgBBxxxXXX', 'wrzgKFCgAAxxxXXX'])
    # Log.info(abc_m)

    # Log.info('')
    #
    # test_es_indices_management(client=es)

    # Log.info('')
    #
    # res_m = term_terms_change(field='r_Id', value='wrzgKFCgAAxxxXXX', index_str=test_index_,
    #                           client=es)
    # Log.info(res_m)
    #
    # res_m = term_terms_change(field='r_Id', value='wrzgKFCgAAxxxXXX', simple_use=True,
    #                           index_str=test_index_, client=es)
    # Log.info(res_m)
    #
    # res_m = term_terms_change(field='r_Id', value='abc123456789', index_str=test_index_, client=es)
    # Log.info(res_m)
    #
    # Log.info('')
    #
    # _es_get(index_str=test_index_, id_str=index_id, client=es)
    # es_func(func_name='_es_get', index_str=test_index_, id_str=index_id)
    #
    # es_func(func_name='_es_get', index_str=test_index_, id_str=index_id + '不存在', client=es)
    #
    # Log.info('')
    #
    # _es_delete(index_str=test_index_, id_str=index_id, client=es)
    # es_func('_es_delete', index_str=test_index_, id_str='index_id' + 'TEST', client=es)
    #
    # Log.info('')
    #
    # _es_create(index_str=test_index_, id_str=index_id, client=es, document={'from': 'xzc', 'text': {'content': 'TEST测试'}})
    # es_func('_es_create', index_str=test_index_, id_str='xie_' + 'TEST' + '2', client=es,
    #         document={'from': 'xzc', 'text': {'content': f'TEST测试-{random.randint(1, 99)}'}})
    #
    # Log.info('')
    #
    # for q_m in ['了了', 'seq:123123']:
    #     _es_search(index_str=test_index_, q=q_m, client=es, size_=500)
    #
    #     Log.info('')
    #
    #     es_func('_es_search', index_str=test_index_, q=q_m, client=es, size_=100, from_=2)
    #
    # Log.info('')
    #
    # b_m = {"query": {"term": {"_id": {"value": index_id}}}}
    # # 传body
    # # The 'body' parameter is deprecated for the 'search' API and will be removed in a future version.
    # _es_search(index_str=test_index_, body=b_m, client=es)
    # es_func('_es_search', index_str=test_index_, body=b_m, client=es)
    # #
    # Log.info('')
    #
    # _es_search(index_str=test_index_, body={"query": {"term": {"_id": {"value": 'index_id'}}}}, client=es)
    # es_func('_es_search', index_str=test_index_, client=es, body={"query": {"term": {"_id": {"value": 'index_id'}}}})
    #
    # Log.info('')
    #
    # q_m = {"term": {"_id": {"value": index_id}}}
    # # 传query
    # _es_search(index_str=test_index_, query=q_m, client=es)
    # es_func('_es_search', index_str=test_index_, client=es, query=q_m)
    #
    # Log.info('')
    #
    # _es_search(index_str=test_index_, query={"term": {"_id": {"value": 'index_id'}}}, client=es)
    # es_func('_es_search', index_str=test_index_, client=es, query={"term": {"_id": {"value": 'index_id'}}})
    #
    # Log.info('')
    #
    # _es_search(index_str=test_index_, body={}, client=es)
    # es_func('_es_search', index_str=test_index_, body={}, client=es)
    # es_func('_es_search', index_str=test_index_, body={}, client=es, sort_='seq:desc')
    #
    # Log.info('')
    # Log.info('')
    #
    # _es_search(index_str=test_index_, body={}, client=es, size_=12)
    # es_func('_es_search', index_str=test_index_, body={}, client=es, size_=12)
    # es_func('_es_search', index_str=test_index_, body={}, client=es, size_=12, sort_='seq:desc')
    #
    # Log.info('')
    #
    # # 传body,sort 有效的值 是'seq:asc'、'seq:desc'
    #
    # es_func('_es_search', client=es, index_str=test_index_, body={}, sort_='seq', size_=15, from_=15)
    # es_func('_es_search', client=es, index_str=test_index_, body={}, sort_='seq:asc', size_=15, from_=15)
    # es_func('_es_search', client=es, index_str=test_index_, body={}, sort_='seq:desc', size_=15, from_=15)
    #
    # Log.info('')
    #
    # # query不可以传{}
    # # _es_search(index_str=test_index_, query={}, client=es, size_=5)
    # es_func('_es_search', index_str=test_index_, query={}, client=es, size_=5)
    # Log.info('')
    #
    # es_func('_es_search', client=es, index_str=test_index_, size_=15, from_=15)
    #
    # Log.info('')
    #
    # _es_search(index_str=test_index_, query={"match": {"seq": 10}}, client=es)
    # _es_search(index_str=test_index_, body={"query": {'match': {"seq": 10}}}, client=es)
    #
    # Log.info('')
    #
    # es_func('_es_search', index_str=test_index_, query={"range": {"seq": {"lte": 4220570, "gte": 3157260}}},
    #         client=es, size_=50)
    #
    # es_func('_es_search', index_str=test_index_, query={"range": {"seq": {"lte": 4220570, "gte": 3157260}}},
    #         client=es, size_=50, sort_={'seq': {'order': 'desc'}})
    #
    # _es_search(index_str=test_index_, query={"match_phrase": {"text.content": '单位应该资源谢谢'}}, client=es)
    # _es_search(index_str=test_index_, query={"bool": {"filter": {"terms": {"seq": [5652, 5, 58587, 5910, 56, 55]}}}},
    #            client=es)
    #
    # Log.info('')
    #
    # _es_search(index_str=test_index_, query={"bool": {
    #     "must": [{"term": {"r_Id": "wrcIqjVAAAXdxxxXXX"}},
    #              {"range": {"xxxXXX": {"lt": 1698508800000, "gt": 1696780800000}}}]}}, client=es)
    #
    # _es_search(index_str=test_index_, query={"bool": {
    #     "must": [{"term": {"r_Id.keyword": "wrcIqjVAAAXdxxxXXX"}},
    #              {"range": {"xxxXXX": {"lt": 1698508800000, "gt": 1696780800000}}}]}}, client=es)
    #
    # _es_search(index_str=test_index_, query={"bool": {
    #     "must": [{"term": {"r_Id": "abc123"}},
    #              {"range": {"xxxXXX": {"lt": 1698508800000, "gt": 1696780800000}}}]}}, client=es)
    #
    # _es_search(index_str=test_index_, query={"bool": {
    #     "must": [{"term": {"r_Id.keyword": "abc123"}},
    #              {"range": {"xxxXXX": {"lt": 1698508800000, "gt": 1696780800000}}}]}}, client=es)
    #
    # Log.info('')
    #
    # _es_search(index_str=test_index_, query={"bool": {
    #     "should": [{"term": {"r_Id": "wrcIqjVAAAXdxxxXXX"}},
    #                {"range": {"xxxXXX": {"lt": 1698508800000, "gt": 1696780800000}}}]}}, client=es)
    #
    # _es_search(index_str=test_index_, query={"term": {"r_Id": "wrcIqjVAAAXdxxxXXX"}},
    #            client=es)
    #
    # _es_search(index_str=test_index_, query={"range": {"xxxXXX": {"lt": 1698508800000, "gt": 1696780800000}}},
    #            client=es)
    #
    # Log.info('')
    #
    # _es_search(index_str=test_index_, query={"bool": {
    #     "must_not": [{"term": {"r_Id.keyword": "wrcIqjVAAAXdxxxXXX"}},
    #                  {"range": {"xxxXXX": {"lt": 1698508800000, "gt": 1696780800000}}}]}}, client=es)
    #
    # _es_search(index_str=test_index_, query={"range": {"xxxXXX": {"lte": 1698508800000}}}, client=es)
    #
    # Log.info('')
    #
    # # 新的id(不存在)
    # id_abc = 'abc' + index_id
    # _es_search(index_str=test_index_, query={"match": {"_id": id_abc}}, client=es)
    #
    # _es_update(index_str=test_index_, id_str=id_abc,
    #            doc={"to{}".format(random.randrange(99999)): "xzc"}, client=es)
    # _es_search(index_str=test_index_, query={"match": {"_id": id_abc}}, client=es)
    #
    # es_func('_es_update', index_str=test_index_, id_str=id_abc,
    #         doc={"to{}".format(random.randrange(99999)): "xzc"}, client=es)
    # _es_search(index_str=test_index_, query={"match": {"_id": id_abc}}, client=es)
    #
    # Log.info('')
    #
    # # 已有的id
    # _es_search(index_str=test_index_, query={"match": {"_id": index_id}}, client=es)
    #
    # _es_update(index_str=test_index_, id_str=index_id, client=es,
    #            doc={"from": "xzc-{}".format(r_m), "r_Id": ""})
    # _es_search(index_str=test_index_, query={"match": {"_id": index_id}}, client=es)
    #
    # es_func('_es_update', index_str=test_index_, id_str=index_id, client=es,
    #         doc={"from": "ZY-{}".format(r_m * 2)})
    # _es_search(index_str=test_index_, query={"match": {"_id": index_id}}, client=es)
    #
    # Log.info('')
    #
    # _es_search(index_str=test_index_, query={"match": {"from": 'xzc'}}, client=es)
    #
    # _es_index(index_str=test_index_, client=es, document={'from': 'xzc', 'text': {'content': 'TEST测试'}})
    # time.sleep(1)
    # _es_search(index_str=test_index_, query={"match": {"from": 'xzc'}}, client=es)
    #
    # es_func('_es_index', index_str=test_index_, client=es, document={'from': 'xzc', 'text': {'content': 'TEST测试'}})
    # time.sleep(1)
    # _es_search(index_str=test_index_, query={"match": {"from": 'xzc'}}, client=es)
    #
    # Log.info('')
    #
    # _es_search(index_str=test_index_, query={"match": {"_id": index_id}}, client=es)
    #
    # _es_index(index_str=test_index_, client=es, id_str=index_id,
    #           document={'seq': 123456 + r_m, 'from': 'xzc2', 'text': {'content': 'TEST测试--'}})
    # time.sleep(1)
    # _es_search(index_str=test_index_, query={"match": {"_id": index_id}}, client=es)
    #
    # es_func('_es_index', index_str=test_index_, client=es, id_str=index_id,
    #         document={'seq': 654321 + r_m, 'from': 'xzc2', 'text': {'content': 'TEST测试--TEST测试'}})
    # time.sleep(1)
    # _es_search(index_str=test_index_, query={"match": {"_id": index_id}}, client=es)
    #
    # Log.info('')
    #
    # id_list_m = ['x1026' + str(i) for i in range(12)]
    #
    # _es_search(index_str=test_index_, client=es, query={'ids': {'values': id_list_m}})
    #
    # Log.info('')
    #
    # _es_delete_by_query(index_str=test_index_, client=es, body={'query': {'ids': {'values': id_list_m}}})
    # _es_search(index_str=test_index_, client=es, query={'ids': {'values': id_list_m}})
    #
    # Log.info('')
    #
    # for i in id_list_m[:5]:
    #     _es_get(index_str=test_index_, client=es, id_str=i)
    #
    #     _es_update(index_str=test_index_, id_str=i, client=es,
    #                doc={"from123321": "zy-{}-123".format(i)})
    #     _es_get(index_str=test_index_, client=es, id_str=i)
    #
    #     _es_delete(index_str=test_index_, client=es, id_str=i)
    #     _es_get(index_str=test_index_, client=es, id_str=i)
    #
    #     _es_create(index_str=test_index_, client=es, id_str=i, document={'hhh': 'Xie_' + str(i) * 3})
    #     _es_get(index_str=test_index_, client=es, doc_type='_all', id_str=i)
    #
    #     _es_index(index_str=test_index_, client=es, id_str=i, document={'hhh': 'x_' + str(i) * 1})
    #     _es_get(index_str=test_index_, client=es, id_str=i)
    #
    #     _es_delete(index_str=test_index_, client=es, id_str=i)
    #     _es_get(index_str=test_index_, client=es,  id_str=i)
    #
    #     _es_index(index_str=test_index_, client=es, id_str=i, document={'hhh': 'XieXie_' + str(i) * 6})
    #     _es_get(index_str=test_index_, client=es, id_str=i)
    #
    #     _es_index(index_str=test_index_, client=es, document={'hhh': 'Xie_xxx' + str(i) * 3})
    #
    #     Log.info('')
    #
    # _es_search(index_str=test_index_, client=es, query={'ids': {'values': id_list_m}}, size_=1000)
    #
    # abc = _es_search(index_str=test_index_, query={"range": {"seq": {'gte': 99999}}}, client=es, size_=10)
    # abc = [h.get('_id') for h in abc]
    # Log.info(abc)

    # abc = random.choice(string.ascii_letters)
    # abc = 'TEST'
    #
    # id_list_m = ['xie_' + abc + str(i) for i in range(0, 10009)]
    # _es_search(index_str=test_index_, client=es, query={'ids': {'values': id_list_m}}, get_more_10000=True)

    # max_seq = get_seq_max(index_str=test_index_, client=es)
    # es_func('_es_bulk', index_str=test_index_, client=es,
    #         id_list=id_list_m,
    #         # id_list=id_list_m,
    #
    #         # body_list=[{'from': 'xie', 'text': {'content': i}} for i in range(10009)],
    #         # op_type='delete',
    #
    #         # op_type='create',
    #         op_type='index',
    #         body_list=[{'hhh': 'xie' + str(i) * 3, 'seq': max_seq + i + 1} for i in range(10005)],
    #         # op_type='update',
    #
    #         # op_type='delete111',
    #         )
    #
    # time.sleep(2)
    # _es_search(index_str=test_index_, client=es, query={'ids': {'values': id_list_m}}, get_more_10000=True)
    # _es_search(index_str=test_index_, client=es, query={'ids': {'values': id_list_m}}, size_=1000)
    #
    # q_m = {'exists': {'field': 'seq'}}
    # res_m = es_func('_es_search', client=es, index_str=test_index_, query=q_m, sort_={"seq": {"order": "desc"}},
    #                 size_=10)
    #
    # _es_search(index_str=test_index_, client=es,
    #            query=term_terms_change(index_str=test_index_, client=es, field='action', value='send'))
    #
    # es_func('_es_search', index_str=test_index_, client=es,
    #         size_=2000,
    #
    #         query=term_terms_change(index_str=test_index_, client=es, field='r_Id',
    #                                 value='wrcIqjVAAAiXXXXXX'),
    #         )

    # _es_search(index_str=test_index_, client=es,
    #            query=term_terms_change(index_str=test_index_, client=es, field='from',
    #                                    value='user_'))

    # Log.info('')
    #
    # q__ = term_terms_change(index_str=test_index_, client=es, field='from', value='123123')
    # _es_search(index_str=test_index_, client=es,
    #            get_more_10000=True,
    #            query=q__)
    #
    #
    # _es_delete_by_query(index_str=test_index_, client=es, body={'query': q__})
    # _es_search(index_str=test_index_, client=es,
    #            get_more_10000=True,
    #            query=q__)
    #
    # Log.info('')
    #
    # es_func('_es_delete_by_query', index_str=test_index_, client=es, body={'query': q__})
    #
    # _es_search(index_str=test_index_, client=es,
    #            get_more_10000=True,
    #            query=q__)
    #
    # _es_search(index_str=test_index_, client=es,
    #            query=term_terms_change(index_str=test_index_, client=es, field='from', value='user_'))
    #
    # _es_search(index_str=test_index_, client=es,
    #            get_more_10000=True,
    #            query=term_terms_change(index_str=test_index_, client=es, field='from', value='user_'))
    #
    # _es_search(index_str=test_index_, client=es,
    #            query={
    #                'bool': {'must': [term_terms_change(index_str=test_index_, client=es, field='from', value=user_),
    #                                  {'exists': {'field': 'r_Id'}}],
    #                         'must_not': term_terms_change(index_str=test_index_, client=es, field='r_Id', value='')
    #                         }},
    #            collapse={'field': 'r_Id.keyword'}, sort_={"seq": {"order": "desc"}}
    #            )
    #
    # _es_search(index_str=test_index_, client=es,
    #            query={'bool': {'must': [term_terms_change(index_str=test_index_, client=es, field='from', value=user_),
    #                                     {'exists': {'field': 'receive'}},
    #                                     {'prefix': {'receive.keyword': 'wm'}}
    #                                     ]}},
    #            collapse={'field': 'receive.keyword'}, sort_={"seq": {"order": "desc"}}
    #            )
    #
    # _es_search(index_str=test_index_, client=es,
    #            query={'bool': {'must': [term_terms_change(index_str=test_index_, client=es, field='from', value=user_),
    #                                     term_terms_change(index_str=test_index_, client=es, field='msgType',
    #                                                       value='text'),
    #                                     {'exists': {'field': 'receive'}}],
    #                            'must_not': {'prefix': {'receive.keyword': 'wm'}}
    #                            }},
    #            collapse={'field': 'receive.keyword'}, sort_={"seq": {"order": "desc"}}
    #            )

    close_es_client(es)


本文链接:https://blog.csdn.net/zyooooxie/article/details/124640467

个人博客 https://blog.csdn.net/zyooooxie

你可能感兴趣的:(数据库学习,python,elasticsearch)