zyooooxie

Python脚本之操作Elasticsearch【二】

本文为博主原创，未经授权，严禁转载及使用。
本文链接：https://blog.csdn.net/zyooooxie/article/details/124640467

之前分享过使用elasticsearch库【一】https://blog.csdn.net/zyooooxie/article/details/109588072 ，继续分享下；

【实际这篇博客推迟发布N个月】

个人博客：https://blog.csdn.net/zyooooxie

【以下所有内容仅为个人项目经历，如有不同，纯属正常】

操作Document

"""
@blog: https://blog.csdn.net/zyooooxie
@qq: 153132336
@email: [email protected]
"""


def test_0821(index_str: str):
    exist_id = 'xie-xie-xie-xie-xie'
    no_exist_id = 'xie'

    list_ = [gl_es_host_new, gl_es_host_new_2, gl_es_host_new_3, gl_es_host_new_4]
    # list_ = [gl_es_host_new]

    for es_host in list_:
        Log.info('')
        Log.error(es_host)

        client = Elasticsearch(es_host, sniff_on_start=True, sniff_on_node_failure=True, request_timeout=60,
                               http_auth=gl_es_auth)

        # Returns whether the cluster is running.
        Log.info(client.ping())

        # Returns basic information about the cluster.
        Log.info(client.info())

        # Returns number of documents matching a query.
        Log.info(client.count(index=index_str))
        Log.info(client.count(index=index_str,
                              body={'query': term_terms_change(index_str=index_str, client=client,
                                                               field='xie_f',
                                                               value='xie_v')}))

        # The filter_path parameter is used to reduce the response returned by elasticsearch.
        Log.info(client.search(index=index_str, filter_path=['hits.hits._id', 'hits.hits._type', 'hits.total']))

        # It also supports the * wildcard character to match any field or part of a field’s name
        Log.info(client.search(index=index_str, filter_path=['hits.hits.*']))

        # Returns information about whether a document exists in an index.
        Log.info(client.exists(index=index_str, id=exist_id))
        Log.info(client.exists(index=index_str, id=no_exist_id))

        # Returns the source of a document.
        Log.info(client.get_source(index=index_str, id=exist_id))

        # Allows to get multiple documents in one request.
        Log.info(client.mget(index=index_str, body={'ids': [exist_id, no_exist_id]}))

        # Closes the Transport and all internal connections
        client.close()

        Log.info('')


def test_es_indices_management(client: Elasticsearch):
    """

    :param client:
    :return:
    """

    # 索引中每个文档都有 类型 。每种类型都有它自己的 映射 ，或者 模式定义 。
    # 映射定义了 类型 中的 域，每个域的数据类型，以及Elasticsearch如何处理这些域。

    # Elasticsearch 支持如下简单域类型：
    # 字符串: string
    # 整数 : byte, short, integer, long
    # 浮点数: float, double
    # 布尔型: boolean
    # 日期: date

    # 当你索引一个包含新域的文档 之前未曾出现-- Elasticsearch 会使用 动态映射 ，通过JSON中基本数据类型，尝试猜测域类型。

    Log.info('********')

    # 下面是索引 两个 最重要的设置:
    # number_of_shards  每个索引的主分片数 。这个配置在索引创建后不能修改。
    # number_of_replicas    每个主分片的副本数 。后期可以动态修改。

    # primary shard：主分片，每个文档都存储在一个分片中，当你存储一个文档的时候，系统会首先存储在主分片中，然后会复制到不同的副本中。
    # replica shard：副本分片，每一个分片有零个或多个副本。副本主要是主分片的复制，可以 增加高可用性，提高性能。

    abc = 'xie' + str(random.randint(1, 9999))

    # The 'body' parameter is deprecated for the 'create' API
    Log.info(client.indices.create(index=abc,
                                   mappings={
                                       "properties": {
                                           "test_{}".format(abc): {"type": "text"}
                                       }
                                   },
                                   settings={
                                       "number_of_shards": 1
                                   })
             )

    Log.info(client.indices.exists(index=abc))

    Log.info(client.indices.stats(index=abc))

    Log.info(client.indices.get_mapping(index=abc))
    Log.info(client.indices.get_settings(index=abc))

    Log.info(client.indices.delete(index=abc))
    Log.info(client.indices.exists(index=abc))

	
def _es_get(index_str: str, client: Elasticsearch, id_str: str, doc_type: str = gl_type, **kwargs):
    """

    :param index_str:
    :param client:
    :param id_str:
    :param doc_type: The type of the document (use `_all` to fetch the first document matching the ID across all types)
    :param kwargs:
    :return:
    """

    res = client.exists(index=index_str,
                        id=id_str)  # Returns information about whether a document exists in an index.
    if not res:
        Log.error(f'当前id：{id_str} 不存在')
        return

    Log.info(client.get(index=index_str, id=id_str, **kwargs))


def _es_delete(index_str: str, client: Elasticsearch, id_str: str, **kwargs):
    """

    :param index_str:
    :param client:
    :param id_str:
    :param kwargs:
    :return:
    """

    res = client.exists(index=index_str, id=id_str)
    if not res:
        Log.error(f'不存在：{id_str}')
        return

    res = client.delete(index=index_str, id=id_str, **kwargs)
    Log.info(res)

    assert res.get('result') == 'deleted'

    return res


def _es_delete_by_query(index_str: str, client: Elasticsearch, body: dict, **kwargs):
    """

    :param index_str:
    :param client:
    :param body: The search definition using the Query DSL
    :param kwargs:
    :return:
    """

    # scroll_size: Size on the scroll request powering the delete by query
    # Default: 100
    res = client.delete_by_query(index=index_str, body=body, scroll_size=1000, **kwargs)
    Log.info(res)
    Log.info(f'{res.get("total")}, {res.get("deleted")}')

    return res


def _es_create(index_str: str, client: Elasticsearch, id_str: str, document: dict, **kwargs):
    """

    :param index_str:
    :param client:
    :param id_str:
    :param document:
    :param kwargs:
    :return:
    """
    res = client.exists(index=index_str, id=id_str)

    if res:
        Log.error(f'已创建：{id_str}')
        return

    res = client.create(index=index_str, id=id_str, document=document, **kwargs)
    Log.info(res)

    assert res.get('result') == 'created'
    return res


def _es_index(index_str: str, client: Elasticsearch, document: dict, id_str: str = None, **kwargs):
    """

    :param index_str:
    :param client:
    :param document:
    :param id_str:
    :param kwargs:
    :return:
    """

    # # ✅ New usage:
    # es.index(document={...})
    #
    # # ❌ Deprecated usage:
    # es.index(body={...})

    if id_str:  # 用全量覆盖的方式更新 某id

        res = client.exists(index=index_str, id=id_str)
        if not res:
            Log.error(f'当前id：{id_str} 不存在')
            return

        res = client.index(index=index_str, document=document, id=id_str, **kwargs)
        assert res.get('result') == 'updated'

    else:

        res = client.index(index=index_str, document=document, **kwargs)
        assert res.get('result') == 'created'

    Log.info(res)
    return res


def _es_update(index_str: str, client: Elasticsearch, id_str: str, doc: dict, **kwargs):
    """
    Enables you to script document updates. The script can update, delete, or skip modifying the document.

    The update API also supports passing a partial document, which is merged into the existing document.

    To fully replace an existing document, use the index API.

    部分更新，可用update()；全量更新，使用index()；
    :param index_str:
    :param client:
    :param id_str:
    :param doc:
    :param kwargs:
    :return:
    """

    res = client.exists(index=index_str, id=id_str)
    if res:

        # The 'body' parameter is deprecated for the 'update' API and will be removed in a future version. Instead use API parameters directly.

        res = client.update(index=index_str, id=id_str, doc=doc, **kwargs)
        Log.info(res)

        return res

    else:
        Log.error(f'当前id：{id_str} 不存在')


def _bulk_actions(index_str: str,
                  id_str: List[str], body: Union[List[dict], List[int]],
                  op_type: str = 'index'):
    """

    :param index_str:
    :param id_str:
    :param body:
    :param op_type: defaults to index
    :return:
    """
    # All bulk helpers accept an instance of {es} class and an iterable action (any iterable, can also be a generator, which is ideal in most cases since it allows you to index large datasets without the need of loading them into memory).

    ib_list = list(zip(id_str, body))

    for id_body in ib_list:

        actions_dict = {"_index": index_str, "_id": id_body[0], '_op_type': op_type}

        # The bulk() api accepts index, create, delete, and update actions.
        # Use the _op_type field to specify an action (_op_type defaults to index)
        if op_type == 'index' or op_type == 'create':
            actions_dict.update(id_body[1])

        elif op_type == 'delete':
            pass

        elif op_type == 'update':
            actions_dict.update(doc=id_body[1])

        else:
            raise Exception('传参有误')

        yield actions_dict


def _es_bulk(index_str: str, client: Elasticsearch,
             body_list: List[dict], id_list: List[str],
             op_type: str, **kwargs):
    """

    :param index_str:
    :param client:
    :param body_list:
    :param id_list:
    :param op_type:
    :param kwargs:
    :return:
    """

    # https://elasticsearch-py.readthedocs.io/en/v7.17.0/helpers.html#bulk-helpers
    actions = _bulk_actions(index_str=index_str, body=body_list, id_str=id_list, op_type=op_type)

    from elasticsearch.helpers import bulk
    res = bulk(client=client, actions=actions, **kwargs)
    Log.info(res)

    return res


def _es_analyze(index_str: str, client: Elasticsearch, text: str):
    """

    :param index_str:
    :param client:
    :param text: 建议 只使用str；If an array of strings is provided, it is analyzed as a multi-value field.
    :return:
    """

    # Elasticsearch 中的数据可以概括的分为两类：精确值和全文。
    # 精确值 如它们听起来那样精确。例如日期或者用户 ID，但字符串也可以表示精确值，例如用户名或邮箱地址。对于精确值来讲，Foo 和 foo 是不同的，2014 和 2014-09-15 也是不同的。
    # 另一方面，全文 是指文本数据（通常以人类容易识别的语言书写），例如一个推文的内容或一封邮件的内容。

    # 当我们 索引 一个文档，它的全文域被分析成词条以用来创建倒排索引。
    # 但是，当我们在全文域 搜索 的时候，我们需要将查询字符串通过 相同的分析过程 ，以保证我们搜索的词条格式与索引中的词条格式一致。

    # 当你查询一个 全文 域时， 会对查询字符串应用相同的分析器，以产生正确的搜索词条列表。
    # 当你查询一个 精确值 域时，不会分析查询字符串，而是搜索你指定的精确值。

    Log.info('********')

    # By default, Elasticsearch changes the values of text fields during analysis.

    # For example, the default standard analyzer changes text field values as follows:
    # 1.Removes most punctuation
    # 2.Divides the remaining content into individual words, called tokens
    # 3.Lowercases the tokens

    Log.info(text)

    # analyzer
    # 分析器可以由每个字段决定。每个字段都可以有不同的分析器，既可以通过配置为字段指定分析器，也可以使用更高层的类型（type）、索引（index）或节点（node）的默认配置

    # If this parameter is not specified, the analyze API uses the analyzer defined in the field’s mapping.
    #
    # If no field is specified, the analyze API uses the default analyzer for the index.
    #
    # If no index is specified, or the index does not have a default analyzer, the analyze API uses the standard analyzer.

    res = client.indices.analyze(body={'text': text}, index=index_str)
    Log.info(res)

    tokens_list = res.get('tokens')

    if len(tokens_list) > 1:

        return [tl.get('token') for tl in tokens_list]
        # token 是实际存储到索引中的词条。
        # position 指明词条在原始文本中出现的位置。 start_offset 和 end_offset 指明字符在原始字符串中的位置。

    else:
        return [tokens_list[0].get('token')]


def query_term(index_: str, es_: Elasticsearch,
               field: str = None, value: Union[str, int] = None,
               field_field: str = None):
    """
    【搜索条件 field=value、field.field_field=value】
    :param index_:
    :param es_:
    :param field:
    :param value:
    :param field_field:
    :return:
    """

    search_dict = gl_search_dict.copy()

    if field_field:

        search_dict.update(query=term_terms_change(index_str=index_, client=es_,
                                                   field='.'.join([field, field_field]), value=value))

    elif field:
        search_dict.update(query=term_terms_change(index_str=index_, client=es_,
                                                   field=field, value=value))

    else:
        return

    print_result(index_, search_dict)
    return search_dict.get('query')


def print_result(index_: str, dict_1: dict):
    print()

    res = f'GET /{index_}/_doc/_search'
    print(res)

    res_ = json.dumps(dict_1, ensure_ascii=False)
    print(res_)

    print()


def term_terms_change(index_str: str, client: Elasticsearch, field: str, value: Any,
                      simple_use: bool = False,
                      term_terms: str = 'term', **kwargs
                      ) -> dict:
    """

    :param index_str:
    :param client:
    :param field:
    :param value:
    :param simple_use:
    :param term_terms:
    :param kwargs:
    :return:
    """
    assert term_terms in ['term', 'terms']

    if not value:
        return {term_terms: {field + '.keyword': value}, **kwargs}

    if simple_use:
        return {term_terms: {field: value}, **kwargs}

    # https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-term-query.html#avoid-term-query-text-fields

    # The term query does not analyze the search term. The term query only searches for the exact term you provide.
    # This means the term query may return poor or no results when searching text fields.

    analyze_list = _es_analyze(index_str=index_str, client=client, text=value)
    Log.info(analyze_list)

    if value in analyze_list and isinstance(value, str):

        return {term_terms: {field: value}, **kwargs}

    else:

        return {term_terms: {field + '.keyword': value}, **kwargs}


def es_func(func_name: str, **kwargs):
    """

    :param func_name:
    :param kwargs:
    :return:
    """

    func_list = ['_es_get', '_es_delete', '_es_create', '_es_index', '_es_search', '_es_update', '_es_bulk',
                 '_es_delete_by_query']

    if not kwargs.get('client'):

        client = connect_es_client(gl_es_host_new, gl_es_auth)

        kwargs.update(client=client)

        close_ = True

    else:

        close_ = False

    # Log.debug(f'传参：{kwargs}')

    try:

        assert func_name in func_list

        # 两种方式 二选一
        # globals().get(func_name)(**kwargs)
        return getattr(sys.modules[__name__], func_name)(**kwargs)

    except AssertionError:

        Log.error('断言失败')
        Log.error(traceback.format_exc())

    except BulkIndexError:

        Log.error('Bulk 遇到错误，被中断')
        Log.error(traceback.format_exc())

    except Exception as e:

        Log.error(e.args)
        Log.error(traceback.format_exc())

    finally:

        time.sleep(1)

        Log.info('es_func() 执行结束')

        if close_:
            close_es_client(client=client)


def get_seq_max(index_str: str, client: Elasticsearch):
    """

    :param index_str:
    :param client:
    :return:
    """

    # res_list = es_func('_es_search', client=client, index_str=index_str, body={}, sort_='seq:desc', size_=1)
    # res_list = _es_search(client=client, index_str=index_str, body={}, sort_='seq:desc', size_=1)

    # q = {'constant_score': {'filter': {'exists': {'field': 'seq'}}}}
    q = {'exists': {'field': 'seq'}}
    res_list = es_func('_es_search', client=client, index_str=index_str, query=q,
                       sort_={"seq": {"order": "desc"}}, size_=1)

    if not res_list:
        seq = 0

    else:
        seq = res_list[0].get('_source').get('seq')

    Log.error(f'当前最大seq：{seq}')
    return seq

"""
@blog: https://blog.csdn.net/zyooooxie
@qq: 153132336
@email: [email protected]
"""


if __name__ == '__main__':
    pass

    es = connect_es_client(gl_es_host_new, gl_es_auth)

    Log.error('')

    test_index_ = 'zyooooxie-data'
    user_ = 'zyooooxie'

    # test_0821(index_str=test_index_)

    index_id = 'xxxxx'
    r_m = random.randrange(99999)

    # query = query_term(field='text', field_field='content', value='你好', index_=test_index_, es_=es)
    # Log.info(query)
    #
    # result = es_func('_es_search', client=es, index_str=test_index_, query=query, size_=10, sort_='seq',
    #                  _source_excludes=['seq'])
    # Log.info(result)
    #
    # result = es_func('_es_search', client=es, index_str=test_index_, query=query, size_=10, sort_=['xxxXXX', 'seq'],
    #                  _source_includes=['xxxXXX', 'seq'])
    # Log.info(result)
    #
    # result = es_func('_es_search', client=es, index_str=test_index_, query=query, size_=10,
    #                  sort_=[{"xxxXXX": {"order": "desc"}}, "seq"],
    #                  _source_includes=['xxxXXX', 'seq',  'text'])
    # Log.info(result)

    # Log.error('')

    # Log.info(es.indices.delete(index=test_index_))

    # get_seq_max(index_str=test_index_, client=es)
    # get_seq_max(index_str='test_index_123', client=es)

    # Log.info('')
    #
    # abc_m = _es_analyze(index_str=test_index_, client=es, text='wrzgKFCgBBxxxXXX')
    # Log.info(abc_m)
    #
    #
    # abc_m = _es_analyze(index_str=test_index_, client=es,
    #                     text=['wrzgKFCgBBxxxXXX', 'wrzgKFCgAAxxxXXX'])
    # Log.info(abc_m)

    # Log.info('')
    #
    # test_es_indices_management(client=es)

    # Log.info('')
    #
    # res_m = term_terms_change(field='r_Id', value='wrzgKFCgAAxxxXXX', index_str=test_index_,
    #                           client=es)
    # Log.info(res_m)
    #
    # res_m = term_terms_change(field='r_Id', value='wrzgKFCgAAxxxXXX', simple_use=True,
    #                           index_str=test_index_, client=es)
    # Log.info(res_m)
    #
    # res_m = term_terms_change(field='r_Id', value='abc123456789', index_str=test_index_, client=es)
    # Log.info(res_m)
    #
    # Log.info('')
    #
    # _es_get(index_str=test_index_, id_str=index_id, client=es)
    # es_func(func_name='_es_get', index_str=test_index_, id_str=index_id)
    #
    # es_func(func_name='_es_get', index_str=test_index_, id_str=index_id + '不存在', client=es)
    #
    # Log.info('')
    #
    # _es_delete(index_str=test_index_, id_str=index_id, client=es)
    # es_func('_es_delete', index_str=test_index_, id_str='index_id' + 'TEST', client=es)
    #
    # Log.info('')
    #
    # _es_create(index_str=test_index_, id_str=index_id, client=es, document={'from': 'xzc', 'text': {'content': 'TEST测试'}})
    # es_func('_es_create', index_str=test_index_, id_str='xie_' + 'TEST' + '2', client=es,
    #         document={'from': 'xzc', 'text': {'content': f'TEST测试-{random.randint(1, 99)}'}})
    #
    # Log.info('')
    #
    # for q_m in ['了了', 'seq:123123']:
    #     _es_search(index_str=test_index_, q=q_m, client=es, size_=500)
    #
    #     Log.info('')
    #
    #     es_func('_es_search', index_str=test_index_, q=q_m, client=es, size_=100, from_=2)
    #
    # Log.info('')
    #
    # b_m = {"query": {"term": {"_id": {"value": index_id}}}}
    # # 传body
    # # The 'body' parameter is deprecated for the 'search' API and will be removed in a future version.
    # _es_search(index_str=test_index_, body=b_m, client=es)
    # es_func('_es_search', index_str=test_index_, body=b_m, client=es)
    # #
    # Log.info('')
    #
    # _es_search(index_str=test_index_, body={"query": {"term": {"_id": {"value": 'index_id'}}}}, client=es)
    # es_func('_es_search', index_str=test_index_, client=es, body={"query": {"term": {"_id": {"value": 'index_id'}}}})
    #
    # Log.info('')
    #
    # q_m = {"term": {"_id": {"value": index_id}}}
    # # 传query
    # _es_search(index_str=test_index_, query=q_m, client=es)
    # es_func('_es_search', index_str=test_index_, client=es, query=q_m)
    #
    # Log.info('')
    #
    # _es_search(index_str=test_index_, query={"term": {"_id": {"value": 'index_id'}}}, client=es)
    # es_func('_es_search', index_str=test_index_, client=es, query={"term": {"_id": {"value": 'index_id'}}})
    #
    # Log.info('')
    #
    # _es_search(index_str=test_index_, body={}, client=es)
    # es_func('_es_search', index_str=test_index_, body={}, client=es)
    # es_func('_es_search', index_str=test_index_, body={}, client=es, sort_='seq:desc')
    #
    # Log.info('')
    # Log.info('')
    #
    # _es_search(index_str=test_index_, body={}, client=es, size_=12)
    # es_func('_es_search', index_str=test_index_, body={}, client=es, size_=12)
    # es_func('_es_search', index_str=test_index_, body={}, client=es, size_=12, sort_='seq:desc')
    #
    # Log.info('')
    #
    # # 传body，sort 有效的值 是'seq:asc'、'seq:desc'
    #
    # es_func('_es_search', client=es, index_str=test_index_, body={}, sort_='seq', size_=15, from_=15)
    # es_func('_es_search', client=es, index_str=test_index_, body={}, sort_='seq:asc', size_=15, from_=15)
    # es_func('_es_search', client=es, index_str=test_index_, body={}, sort_='seq:desc', size_=15, from_=15)
    #
    # Log.info('')
    #
    # # query不可以传{}
    # # _es_search(index_str=test_index_, query={}, client=es, size_=5)
    # es_func('_es_search', index_str=test_index_, query={}, client=es, size_=5)
    # Log.info('')
    #
    # es_func('_es_search', client=es, index_str=test_index_, size_=15, from_=15)
    #
    # Log.info('')
    #
    # _es_search(index_str=test_index_, query={"match": {"seq": 10}}, client=es)
    # _es_search(index_str=test_index_, body={"query": {'match': {"seq": 10}}}, client=es)
    #
    # Log.info('')
    #
    # es_func('_es_search', index_str=test_index_, query={"range": {"seq": {"lte": 4220570, "gte": 3157260}}},
    #         client=es, size_=50)
    #
    # es_func('_es_search', index_str=test_index_, query={"range": {"seq": {"lte": 4220570, "gte": 3157260}}},
    #         client=es, size_=50, sort_={'seq': {'order': 'desc'}})
    #
    # _es_search(index_str=test_index_, query={"match_phrase": {"text.content": '单位应该资源谢谢'}}, client=es)
    # _es_search(index_str=test_index_, query={"bool": {"filter": {"terms": {"seq": [5652, 5, 58587, 5910, 56, 55]}}}},
    #            client=es)
    #
    # Log.info('')
    #
    # _es_search(index_str=test_index_, query={"bool": {
    #     "must": [{"term": {"r_Id": "wrcIqjVAAAXdxxxXXX"}},
    #              {"range": {"xxxXXX": {"lt": 1698508800000, "gt": 1696780800000}}}]}}, client=es)
    #
    # _es_search(index_str=test_index_, query={"bool": {
    #     "must": [{"term": {"r_Id.keyword": "wrcIqjVAAAXdxxxXXX"}},
    #              {"range": {"xxxXXX": {"lt": 1698508800000, "gt": 1696780800000}}}]}}, client=es)
    #
    # _es_search(index_str=test_index_, query={"bool": {
    #     "must": [{"term": {"r_Id": "abc123"}},
    #              {"range": {"xxxXXX": {"lt": 1698508800000, "gt": 1696780800000}}}]}}, client=es)
    #
    # _es_search(index_str=test_index_, query={"bool": {
    #     "must": [{"term": {"r_Id.keyword": "abc123"}},
    #              {"range": {"xxxXXX": {"lt": 1698508800000, "gt": 1696780800000}}}]}}, client=es)
    #
    # Log.info('')
    #
    # _es_search(index_str=test_index_, query={"bool": {
    #     "should": [{"term": {"r_Id": "wrcIqjVAAAXdxxxXXX"}},
    #                {"range": {"xxxXXX": {"lt": 1698508800000, "gt": 1696780800000}}}]}}, client=es)
    #
    # _es_search(index_str=test_index_, query={"term": {"r_Id": "wrcIqjVAAAXdxxxXXX"}},
    #            client=es)
    #
    # _es_search(index_str=test_index_, query={"range": {"xxxXXX": {"lt": 1698508800000, "gt": 1696780800000}}},
    #            client=es)
    #
    # Log.info('')
    #
    # _es_search(index_str=test_index_, query={"bool": {
    #     "must_not": [{"term": {"r_Id.keyword": "wrcIqjVAAAXdxxxXXX"}},
    #                  {"range": {"xxxXXX": {"lt": 1698508800000, "gt": 1696780800000}}}]}}, client=es)
    #
    # _es_search(index_str=test_index_, query={"range": {"xxxXXX": {"lte": 1698508800000}}}, client=es)
    #
    # Log.info('')
    #
    # # 新的id(不存在)
    # id_abc = 'abc' + index_id
    # _es_search(index_str=test_index_, query={"match": {"_id": id_abc}}, client=es)
    #
    # _es_update(index_str=test_index_, id_str=id_abc,
    #            doc={"to{}".format(random.randrange(99999)): "xzc"}, client=es)
    # _es_search(index_str=test_index_, query={"match": {"_id": id_abc}}, client=es)
    #
    # es_func('_es_update', index_str=test_index_, id_str=id_abc,
    #         doc={"to{}".format(random.randrange(99999)): "xzc"}, client=es)
    # _es_search(index_str=test_index_, query={"match": {"_id": id_abc}}, client=es)
    #
    # Log.info('')
    #
    # # 已有的id
    # _es_search(index_str=test_index_, query={"match": {"_id": index_id}}, client=es)
    #
    # _es_update(index_str=test_index_, id_str=index_id, client=es,
    #            doc={"from": "xzc-{}".format(r_m), "r_Id": ""})
    # _es_search(index_str=test_index_, query={"match": {"_id": index_id}}, client=es)
    #
    # es_func('_es_update', index_str=test_index_, id_str=index_id, client=es,
    #         doc={"from": "ZY-{}".format(r_m * 2)})
    # _es_search(index_str=test_index_, query={"match": {"_id": index_id}}, client=es)
    #
    # Log.info('')
    #
    # _es_search(index_str=test_index_, query={"match": {"from": 'xzc'}}, client=es)
    #
    # _es_index(index_str=test_index_, client=es, document={'from': 'xzc', 'text': {'content': 'TEST测试'}})
    # time.sleep(1)
    # _es_search(index_str=test_index_, query={"match": {"from": 'xzc'}}, client=es)
    #
    # es_func('_es_index', index_str=test_index_, client=es, document={'from': 'xzc', 'text': {'content': 'TEST测试'}})
    # time.sleep(1)
    # _es_search(index_str=test_index_, query={"match": {"from": 'xzc'}}, client=es)
    #
    # Log.info('')
    #
    # _es_search(index_str=test_index_, query={"match": {"_id": index_id}}, client=es)
    #
    # _es_index(index_str=test_index_, client=es, id_str=index_id,
    #           document={'seq': 123456 + r_m, 'from': 'xzc2', 'text': {'content': 'TEST测试--'}})
    # time.sleep(1)
    # _es_search(index_str=test_index_, query={"match": {"_id": index_id}}, client=es)
    #
    # es_func('_es_index', index_str=test_index_, client=es, id_str=index_id,
    #         document={'seq': 654321 + r_m, 'from': 'xzc2', 'text': {'content': 'TEST测试--TEST测试'}})
    # time.sleep(1)
    # _es_search(index_str=test_index_, query={"match": {"_id": index_id}}, client=es)
    #
    # Log.info('')
    #
    # id_list_m = ['x1026' + str(i) for i in range(12)]
    #
    # _es_search(index_str=test_index_, client=es, query={'ids': {'values': id_list_m}})
    #
    # Log.info('')
    #
    # _es_delete_by_query(index_str=test_index_, client=es, body={'query': {'ids': {'values': id_list_m}}})
    # _es_search(index_str=test_index_, client=es, query={'ids': {'values': id_list_m}})
    #
    # Log.info('')
    #
    # for i in id_list_m[:5]:
    #     _es_get(index_str=test_index_, client=es, id_str=i)
    #
    #     _es_update(index_str=test_index_, id_str=i, client=es,
    #                doc={"from123321": "zy-{}-123".format(i)})
    #     _es_get(index_str=test_index_, client=es, id_str=i)
    #
    #     _es_delete(index_str=test_index_, client=es, id_str=i)
    #     _es_get(index_str=test_index_, client=es, id_str=i)
    #
    #     _es_create(index_str=test_index_, client=es, id_str=i, document={'hhh': 'Xie_' + str(i) * 3})
    #     _es_get(index_str=test_index_, client=es, doc_type='_all', id_str=i)
    #
    #     _es_index(index_str=test_index_, client=es, id_str=i, document={'hhh': 'x_' + str(i) * 1})
    #     _es_get(index_str=test_index_, client=es, id_str=i)
    #
    #     _es_delete(index_str=test_index_, client=es, id_str=i)
    #     _es_get(index_str=test_index_, client=es,  id_str=i)
    #
    #     _es_index(index_str=test_index_, client=es, id_str=i, document={'hhh': 'XieXie_' + str(i) * 6})
    #     _es_get(index_str=test_index_, client=es, id_str=i)
    #
    #     _es_index(index_str=test_index_, client=es, document={'hhh': 'Xie_xxx' + str(i) * 3})
    #
    #     Log.info('')
    #
    # _es_search(index_str=test_index_, client=es, query={'ids': {'values': id_list_m}}, size_=1000)
    #
    # abc = _es_search(index_str=test_index_, query={"range": {"seq": {'gte': 99999}}}, client=es, size_=10)
    # abc = [h.get('_id') for h in abc]
    # Log.info(abc)

    # abc = random.choice(string.ascii_letters)
    # abc = 'TEST'
    #
    # id_list_m = ['xie_' + abc + str(i) for i in range(0, 10009)]
    # _es_search(index_str=test_index_, client=es, query={'ids': {'values': id_list_m}}, get_more_10000=True)

    # max_seq = get_seq_max(index_str=test_index_, client=es)
    # es_func('_es_bulk', index_str=test_index_, client=es,
    #         id_list=id_list_m,
    #         # id_list=id_list_m,
    #
    #         # body_list=[{'from': 'xie', 'text': {'content': i}} for i in range(10009)],
    #         # op_type='delete',
    #
    #         # op_type='create',
    #         op_type='index',
    #         body_list=[{'hhh': 'xie' + str(i) * 3, 'seq': max_seq + i + 1} for i in range(10005)],
    #         # op_type='update',
    #
    #         # op_type='delete111',
    #         )
    #
    # time.sleep(2)
    # _es_search(index_str=test_index_, client=es, query={'ids': {'values': id_list_m}}, get_more_10000=True)
    # _es_search(index_str=test_index_, client=es, query={'ids': {'values': id_list_m}}, size_=1000)
    #
    # q_m = {'exists': {'field': 'seq'}}
    # res_m = es_func('_es_search', client=es, index_str=test_index_, query=q_m, sort_={"seq": {"order": "desc"}},
    #                 size_=10)
    #
    # _es_search(index_str=test_index_, client=es,
    #            query=term_terms_change(index_str=test_index_, client=es, field='action', value='send'))
    #
    # es_func('_es_search', index_str=test_index_, client=es,
    #         size_=2000,
    #
    #         query=term_terms_change(index_str=test_index_, client=es, field='r_Id',
    #                                 value='wrcIqjVAAAiXXXXXX'),
    #         )

    # _es_search(index_str=test_index_, client=es,
    #            query=term_terms_change(index_str=test_index_, client=es, field='from',
    #                                    value='user_'))

    # Log.info('')
    #
    # q__ = term_terms_change(index_str=test_index_, client=es, field='from', value='123123')
    # _es_search(index_str=test_index_, client=es,
    #            get_more_10000=True,
    #            query=q__)
    #
    #
    # _es_delete_by_query(index_str=test_index_, client=es, body={'query': q__})
    # _es_search(index_str=test_index_, client=es,
    #            get_more_10000=True,
    #            query=q__)
    #
    # Log.info('')
    #
    # es_func('_es_delete_by_query', index_str=test_index_, client=es, body={'query': q__})
    #
    # _es_search(index_str=test_index_, client=es,
    #            get_more_10000=True,
    #            query=q__)
    #
    # _es_search(index_str=test_index_, client=es,
    #            query=term_terms_change(index_str=test_index_, client=es, field='from', value='user_'))
    #
    # _es_search(index_str=test_index_, client=es,
    #            get_more_10000=True,
    #            query=term_terms_change(index_str=test_index_, client=es, field='from', value='user_'))
    #
    # _es_search(index_str=test_index_, client=es,
    #            query={
    #                'bool': {'must': [term_terms_change(index_str=test_index_, client=es, field='from', value=user_),
    #                                  {'exists': {'field': 'r_Id'}}],
    #                         'must_not': term_terms_change(index_str=test_index_, client=es, field='r_Id', value='')
    #                         }},
    #            collapse={'field': 'r_Id.keyword'}, sort_={"seq": {"order": "desc"}}
    #            )
    #
    # _es_search(index_str=test_index_, client=es,
    #            query={'bool': {'must': [term_terms_change(index_str=test_index_, client=es, field='from', value=user_),
    #                                     {'exists': {'field': 'receive'}},
    #                                     {'prefix': {'receive.keyword': 'wm'}}
    #                                     ]}},
    #            collapse={'field': 'receive.keyword'}, sort_={"seq": {"order": "desc"}}
    #            )
    #
    # _es_search(index_str=test_index_, client=es,
    #            query={'bool': {'must': [term_terms_change(index_str=test_index_, client=es, field='from', value=user_),
    #                                     term_terms_change(index_str=test_index_, client=es, field='msgType',
    #                                                       value='text'),
    #                                     {'exists': {'field': 'receive'}}],
    #                            'must_not': {'prefix': {'receive.keyword': 'wm'}}
    #                            }},
    #            collapse={'field': 'receive.keyword'}, sort_={"seq": {"order": "desc"}}
    #            )

    close_es_client(es)

本文链接：https://blog.csdn.net/zyooooxie/article/details/124640467

个人博客 https://blog.csdn.net/zyooooxie

你可能感兴趣的:(数据库学习,python,elasticsearch)

Ubuntu 手动安装 Open WebUI 完整指南老大白菜 python ubuntu linux 运维
Ubuntu手动安装OpenWebUI完整指南前提条件在安装OpenWebUI之前，请确保您的系统满足以下要求：Ubuntu22.04LTS或更高版本Python3.10+Node.js18+Git至少4GB内存足够的磁盘空间（推荐20GB以上）安装步骤1.更新系统包sudoaptupdatesudoaptupgrade-y2.安装必要的依赖#安装Python和Node.jssudoaptinst
cmd命令行把bat或exe执行为windows服务产品媛Gloria Deng
cmd命令行把bat或exe执行为windows服务：在bin目录下执行以下命令sccreate[服务名]binPath=[bin目录下的.bat/.exe]start=auto createelasticsearchbinPath=elasticsearch.batstart=auto
Python中try-except-else-finally语句用于处理异常上趣工作室 python python 开发语言
在Python中，try-except-else-finally语句用于处理异常和无论是否发生异常都需要执行的代码块。下面是每个部分的用法：try：在try块中编写可能引发异常的代码。如果没有异常发生，程序将继续执行try块后面的代码；如果发生异常，程序将跳到适当的except块。except：在except块中处理特定类型的异常。可以指定一个或多个异常类型，以及相应的处理代码。如果发生指定类型的
.net如何调用python 轮胎技术Tyretek python 开发语言 pycharm ide
.NET可以通过调用Python的执行文件或者Python库来调用Python代码。一种常用的方法是在.NET中使用Process类调用Python的执行文件。这样做的好处是你可以将Python代码打包成独立的文件，不需要在.NET中引用任何Python相关的库。下面是一个示例，假设你有一个Python文件"test.py"，内容如下：defgreet(name):print("Hello,"+n
vb调用python函数_vb.net / C# 调用 python weixin_39522170 vb调用python函数
1.IronPython简介IronPython是一种在.NET及Mono上的Python实现，由微软的JimHugunin所发起，是一个开源的项目，基于微软的DLR引擎；托管于微软的开源网站CodePlex(www.codeplex.com)。2.安装IronPython安装下载下来的安装包(要先装VS)。3.创建项目添加引用：浏览到IronPython的安装目录中，添加对IronPython.
Python 爬虫实战：从喜马拉雅爬取有声书播放量，挖掘热门音频内容西攻城狮北 python 爬虫音视频实战案例
目录引言一、项目背景与需求分析1.1喜马拉雅平台的特点1.2数据爬取目标二、技术选型与工具准备2.1技术选型2.2工具准备三、爬取有声书播放量数据3.1获取音频列表3.2获取音频详情四、数据存储五、数据处理与分析5.1数据清洗5.2数据分析六、可视化展示七、总结与展望引言喜马拉雅作为国内知名的音频分享平台，拥有海量的有声书、广播剧、音乐等内容。通过爬取喜马拉雅上的有声书播放量数据，我们可以分析哪些
Ubuntu交叉编译 arm板子上的TVM 陈有爱 TVM ubuntu 人工智能
目录X86Ubuntu的TVM安装LLVM下载tvm配置config.cmake编译源码python安装测试是否安装成功可以在安装一些库，用于RPCTracker和auto-tuning交叉编译801arm的TVM交叉编译链下载配置config.cmake编译源码编译的时候可能会遇到错误ONNX模型转换为TVM模型创建pre.py，将onnx模型编译成tvm.so文件测试TVM模型修改demo程序
【Python入门基础】——第1篇：从入门到精通：Python简介与环境搭建详解猿享天开 python从入门到精通 python 开发语言
第1篇：Python简介与环境搭建目录什么是Python？Python的历史与特点安装Python解释器配置开发环境选择合适的集成开发环境（IDE）使用文本编辑器运行第一个Python程序常见问题及解决方法总结什么是Python？Python是一种高级、通用、解释型的编程语言，由GuidovanRossum于1991年首次发布。Python以其简洁易读的语法、广泛的应用领域和强大的社区支持，成为全
python与excel整合全教程刘同学Python学习日记 python excel 开发语言
Python与Excel的整合非常强大，尤其适合处理大数据、自动化表格操作以及进行高级数据分析。以下是一个全教程，涵盖常用的Python库及其应用：1.准备工作安装必要的库：使用以下命令安装常用库：pipinstallopenpyxlpandasxlrdxlsxwriterpywin32openpyxl:用于操作Excel的.xlsx文件（推荐）。pandas:强大的数据分析工具，支持读取和写入E
高效目录操作：如何使用 os.listdir 函数列出文件和文件夹刘同学Python学习日记学习记录 os库 python 学习
在Python中，os.listdir()是一个用于列出指定目录下所有文件和子目录名称的函数。它来自于os模块，该模块提供了与操作系统进行交互的多种功能。importos#列出当前目录下的所有文件和子目录entries=os.listdir('.')print(entries)在这个示例中：os.listdir('.')将返回当前工作目录（用.表示）的所有文件和目录的名称列表。entries变量将
【AI中数学-数理统计-综合实例-包括python实现】揭开数据的面纱：真实样本数据的探索与可视化云博士的AI课堂 AI中的数学人工智能 python 数理统计数据预处理数据探索数据可视化机器学习
第五章：数理统计-综合实例1.揭开数据的面纱：真实样本数据的探索与可视化在人工智能（AI）应用中，数据是构建算法和模型的基石，而数理统计则为我们提供了理解和处理这些数据的工具。数据探索和可视化是数理统计中至关重要的步骤，它们不仅能帮助我们理解数据的分布、关系和趋势，还能够为后续的建模工作提供依据。本节将通过五个实际案例，展示如何使用数理统计和可视化技术对真实样本数据进行探索。每个案例都包括具体的描
Python.NET 安装与使用教程卫伊祺Ralph
Python.NET安装与使用教程项目地址:https://gitcode.com/gh_mirrors/py/pythonnet本教程将指导你了解并安装Python.NET——这是一个让Python程序员能够无缝集成.NET框架的开源库。1.项目目录结构及介绍在克隆或下载pythonnet的源代码仓库后，你会看到以下基本目录结构：pythonnet/├──LICENSE#许可文件├──MANIF
Apple M1 ARM MacBook 安装 Apache TVM FF-Studio arm开发 apache
一、前置准备AppleSiliconMacBook本文以AppleM1/M2为例，M3及后续版本同理。已安装HomebrewmacOS上的包管理器，可前往Homebrew官网查看安装指引。已安装Anaconda或Miniforge确保Conda是ARM版本（通过condainfo|grepplatform验证应为osx-arm64）。二、创建并激活Conda环境在终端创建环境（Python3.8为
python学习专栏 zhousenshan python新赛道 python
推荐学习资料《15分钟轻松学Python》教程目录-CSDN博客每天40分玩转Django教程目录-CSDN博客Pycharm社区版搭建Django环境及Django简单项目、操控mysql数据库-CSDN博客这个开源有关于事务方面高级内容介绍：django-vue-lyadmin:django-vue-lyadmin前端采用vue3+elementplus,后端采用PythonDjangoDRF
[笔记] 如何在win上安装fbprophet库（Anaconda-Spyder） WangMH_CHN 笔记
fbprophet库是Google开发的一个用于时间序列分析的库，该库的运行需要用到C++编译，因此最开始使用python安装的时候会出现很多问题。本文总结了整个安装过程，记录在此。首先，先阐述初始配置情况：我习惯使用在Anaconda上使用Spyder来写代码，win10系统，系统基础的环境是python3.11。但是fbprophet只支持py2.7、3.5~3.8，因此需要配置一
python文件：py,ipynb, pyi, pyc, pyd, pyo都是什么文件？ m 哆哆.ღ python python 开发语言
python：py,ipynb,pyi,pyc,pyd,pyo都是什么文件？1python文件类型介绍1.1.py文件：源代码.py文件是Python最基本的源代码文件格式，用于存储纯文本形式的Python代码。它是开发者编写程序的主要场所，包含函数、类、变量定义以及执行逻辑。Python解释器直接读取并执行.py文件中的指令。例如，创建一个简单的hello.py文件，内容如下：print("He
【Python进阶】Python中的电子邮件处理：SMTP、IMAP和MIME m 哆哆.ღ python python 服务器网络
1、电子邮件概述1.1电子邮件的工作原理1.1.1邮件服务器与客户端电子邮件的运作基于客户端-服务器架构，用户通常通过邮件客户端软件（如Outlook、Thunderbird等）或者网页版邮件服务（如Gmail、YahooMail等）撰写、发送和接收邮件。邮件客户端负责与邮件服务器进行通信，邮件服务器则承担着存储、转发和管理邮件的任务。当用户编写一封电子邮件后，邮件首先被客户端软件打包并通过SMT
Python 数据分析 - 初识 Pandas 一名技术极客 #Python 进阶爬虫 python 数据分析 pandas
Python数据分析-初识Pandas简介SeriesDataFrame创建基本操作添加删除简介Pandas基于NumPy开发，它提供了快速、灵活、明确的数据结构，旨在简单、直观地处理数据。Pandas适用于处理以下类型的数据：有序和无序的时间序列数据带行列标签的矩阵数据，包括同构或异构型数据与SQL或Excel表类似的，含异构列的表格数据任意其它形式的观测、统计数据集，数据转入Pandas数据结
Python中使用SQLite 昂热校长
开发十年，就只剩下这套Java开发体系了>>>SQLite：SQLite是一种数据库，Python中集成了SQLite3，所以在Python中使用SQLite，可以直接导入SQLite包，不需要做额外的配置。更多的SQLite简介和相关知识可以查看专门的教程：http://www.runoob.com/sqlite/sqlite-tutorial.htmlPython中使用SQLite:可以直接像
Python自动摘要与文本摘录 CrMylive. python easyui 开发语言
前言随着互联网时代的到来，信息爆炸的问题越来越严重，人们需要处理的信息量也越来越大。在这种情况下，文本摘要和摘录技术变得越来越重要。文本摘要和摘录技术可以自动从大量的文本中提取出重要的信息，为人们快速掌握信息提供了有效的途径。本文将介绍Python自动摘要与文本摘录的相关技术，包括文本摘要和摘录的定义、方法、应用场景等方面。本文将从以下几个方面进行讲解：文本摘要和摘录的定义与概述文本摘要的方法和技
pycharm、anaconda安装tensorflow问题努力的南波万 pycharm tensorflow neo4j
(pythonconda01)C:\Users\lvd13>condainstalltensorflowChannels:-defaultsPlatform:win-64Collectingpackagemetadata(repodata.json):doneSolvingenvironment:|warninglibmambaAddedemptydependencyforproblemtypeS
2025年美赛数学建模 MCM Problem B: Managing Sustainable Tourism 问题 B：可持续旅游管理代码解析 2025年数学建模美赛 2025年美赛MCM/ICM 数学建模旅游 2025美赛 2025年数学建模美赛 python代码 matlab 可持续旅游管理
目录代码框架：遗传算法优化可持续旅游模型python代码代码解析：matlab代码代码解析：代码框架：遗传算法优化可持续旅游模型python代码importnumpyasnpimportrandomimportmatplotlib.pyplotasplt#定义遗传算法的参数POP_SIZE=100#种群大小GENS=500#迭代代数MUTATION_RATE=0.01#变异率CROSSOVER_R
python 操作sqlite COSummer python python sqlite
importsqlite3if__name__=='__main__':cx=sqlite3.connect("C:/Users/503061752/Desktop/AutoTest.sdb")cu=cx.cursor()cu.execute("select*fromwaiting_time")res=cu.fetchall()forcurresinres:print(curres)以上代码实现的
Python 实现文本摘要功能热爱技术的小胡 python
互联网时代信息爆炸式增长，人们面对越来越多的信息无法一一阅读，而文本自动摘要技术可以一定程度上缓解这个问题。摘要就是一篇文章的核心部分信息，文本自动摘要技术分抽取式摘要和生成式摘要，前者是在原文中挑选一定比例的句子拼凑成一个摘要，后者更接近人为的总结式简写一篇文章。目前越来越多的研究者使用深度神经网络来研究生成式摘要技术，但是难度也挺大，效果有限。本文的方法是使用基于启发式规则的算法实现了一个抽取
使用Python实现文本自动摘要 UIEdit python 人工智能开发语言
使用Python实现文本自动摘要在互联网时代，信息量不断增加，但人类阅读速度是有限的，如何高效地阅读和获取信息成为一项重要的任务。文本自动摘要技术可以帮助我们快速获取一篇文章的核心内容，从而提高阅读效率。文本自动摘要分为抽取式摘要和生成式摘要两种方法。其中抽取式摘要是从原文中选择一定比例的句子拼凑成一个摘要，而生成式摘要是通过对原文进行解析、理解、推理等方式生成摘要。下面我们将介绍如何使用Pyth
2025年01月30日Github流行趋势油泼辣子多加 GitHub每日趋势 github
项目名称：Janus项目地址url：https://github.com/deepseek-ai/Janus项目语言：Python历史star数：11942今日star数：2187项目维护者：learningpro,hills-code,TheOneTrueGuy,mowentian,soloice项目简介：Janus系列：统一多模态理解和生成模型项目名称：DeepSeek-Coder项目地址ur
快手NS sig3签名算法（2025年1月） sh_moranliunian 蜘蛛侠网络爬虫后端 python 爬虫算法
kuaishou/__NS_sig3.js源码见文章最后。python中调用示例importjsonimportsysimportrequestsimportosimportexecjsimporthashlibimportdatetimefromCookieUtilimportCookieUtilfromfake_useragentimportUserAgentnormal_js=execjs.
通过爬虫方式实现视频号助手发布视频 sh_moranliunian 蜘蛛侠 python 网络协议爬虫网络爬虫后端
1、将真实的cookie贴到解压后目录中cookie.txt文件里，修改python代码里的user_agent和video_path,cover_path等变量的值，最后运行python脚本即可；2、运行之前根据import提示安装一些常见依赖，比如requests等；3、2025年1月份最新版；代码如下：importjsonimporttimeimportrequestsimportosimp
python的with中的变量是不是局部作用域？Python上下文管理器-with使用? 梦境之冢 python 开发实战问题解决 python 开发语言
一、Python中的with语句并不创建新的作用域。‌在with语句中定义的变量，其作用域并不局限于with语句块内部，而是在整个作用域内都是可见的。这意味着在with语句块外部也可以访问这些变量，不会因为with语句的存在而创建新的作用域。例如，以下代码中的变量a在with语句块外部也可以访问：withopen('test.txt','w')asfout:a=12line='testline\n
【零散技术】MAC 安装多版本node Odoo穆尘前端 macos
时间是我们最宝贵的财富,珍惜手上的每个时分不同前端项目运行的node版本不一致，会导致无法运行，就像Odoo也需要依据版本使用对应的python环境。python可以用conda随时切换版本，那么Node可以吗？答案是肯定的。1、安装n（类似于conda的工具，单一字符还是很特别）npminstall-gn2、安装nodesudo-En14.21.3版本参照表Node.jsVersionRelea
mongodb3.03开启认证 21jhf mongodb
下载了最新mongodb3.03版本，当使用--auth 参数命令行开启mongodb用户认证时遇到很多问题，现总结如下：（百度上搜到的基本都是老版本的，看到db.addUser的就是，请忽略） Windows下我做了一个bat文件，用来启动mongodb，命令行如下： mongod --dbpath db\data --port 27017 --directoryperdb --logp
【Spark103】Task not serializable bit1129 Serializable
Task not serializable是Spark开发过程最令人头疼的问题之一，这里记录下出现这个问题的两个实例，一个是自己遇到的，另一个是stackoverflow上看到。等有时间了再仔细探究出现Task not serialiazable的各种原因以及出现问题后如何快速定位问题的所在，至少目前阶段碰到此类问题，没有什么章法 1. package spark.exampl
你所熟知的 LRU(最近最少使用) dalan_123 java
关于LRU这个名词在很多地方或听说，或使用，接下来看下lru缓存回收的实现 1、大体的想法 a、查询出最近最晚使用的项 b、给最近的使用的项做标记通过使用链表就可以完成这两个操作，关于最近最少使用的项只需要返回链表的尾部；标记最近使用的项，只需要将该项移除并放置到头部，那么难点就出现你如何能够快速在链表定位对应的该项？这时候多
Javascript 跨域周凡杨 JavaScript jsonp 跨域 cross-domain
linux下安装apache服务器 g21121 apache
安装apache 下载windows版本apache，下载地址：http://httpd.apache.org/download.cgi 1.windows下安装apache Windows下安装apache比较简单，注意选择路径和端口即可，这里就不再赘述了。 2.linux下安装apache：下载之后上传到linux的相关目录，这里指定为/home/apach
FineReport的JS编辑框和URL地址栏语法简介老A不折腾 finereport web报表报表软件语法总结
JS编辑框： 1.FineReport的js。作为一款BS产品，browser端的JavaScript是必不可少的。 FineReport中的js是已经调用了finereport.js的。大家知道，预览报表时，报表servlet会将cpt模板转为html，在这个html的head头部中会引入FineReport的js，这个finereport.js中包含了许多内置的fun
根据STATUS信息对MySQL进行优化墙头上一根草 status
mysql 查看当前正在执行的操作，即正在执行的sql语句的方法为: show processlist 命令 mysql> show global status;可以列出MySQL服务器运行各种状态值，我个人较喜欢的用法是show status like '查询值%';一、慢查询mysql> show variab
我的spring学习笔记7-Spring的Bean配置文件给Bean定义别名 aijuans Spring 3
本文介绍如何给Spring的Bean配置文件的Bean定义别名？原始的 <bean id="business" class="onlyfun.caterpillar.device.Business"> <property name="writer"> <ref b
高性能mysql 之性能剖析 annan211 性能 mysql mysql 性能剖析剖析
1 定义性能优化 mysql服务器性能，此处定义为响应时间。在解释性能优化之前，先来消除一个误解，很多人认为，性能优化就是降低cpu的利用率或者减少对资源的使用。这是一个陷阱。资源时用来消耗并用来工作的，所以有时候消耗更多的资源能够加快查询速度，保持cpu忙绿，这是必要的。很多时候发现编译进了新版本的InnoDB之后，cpu利用率上升的很厉害，这并不
主外键和索引唯一性约束百合不是茶索引唯一性约束主外键约束联机删除
目标;第一步;创建两张表用户表和文章表第二步;发表文章 1,建表; ---用户表 BlogUsers --userID唯一的 --userName --pwd --sex create
线程的调度 bijian1013 java 多线程 thread 线程的调度 java多线程
1. Java提供一个线程调度程序来监控程序中启动后进入可运行状态的所有线程。线程调度程序按照线程的优先级决定应调度哪些线程来执行。 2. 多数线程的调度是抢占式的（即我想中断程序运行就中断，不需要和将被中断的程序协商） a)
查看日志常用命令 bijian1013 linux 命令 unix
一.日志查找方法，可以用通配符查某台主机上的所有服务器grep "关键字" /wls/applogs/custom-*/error.log 二.查看日志常用命令1.grep '关键字' error.log：在error.log中搜索'关键字'2.grep -C10 '关键字' error.log：显示关键字前后10行记录3.grep '关键字' error.l
【持久化框架MyBatis3一】MyBatis版HelloWorld bit1129 helloworld
MyBatis这个系列的文章，主要参考《Java Persistence with MyBatis 3》。样例数据本文以MySQL数据库为例，建立一个STUDENTS表，插入两条数据，然后进行单表的增删改查 CREATE TABLE STUDENTS ( stud_id int(11) NOT NULL AUTO_INCREMENT,
【Hadoop十五】Hadoop Counter bit1129 hadoop
1. 只有Map任务的Map Reduce Job File System Counters FILE: Number of bytes read=3629530 FILE: Number of bytes written=98312 FILE: Number of read operations=0 FILE: Number of lar
解决Tomcat数据连接池无法释放 ronin47 tomcat 连接池　优化
近段时间，公司的检测中心报表系统(SMC)的开发人员时不时找到我，说用户老是出现无法登录的情况。前些日子因为手头上有Jboss集群的测试工作，发现用户不能登录时，都是在Tomcat中将这个项目Reload一下就好了，不过只是治标而已，因为大概几个小时之后又会再次出现无法登录的情况。今天上午，开发人员小毛又找到我，要我协助将这个问题根治一下，拖太久用户难保不投诉。简单分析了一
java-75-二叉树两结点的最低共同父结点 bylijinnan java
import java.util.LinkedList; import java.util.List; import ljn.help.*; public class BTreeLowestParentOfTwoNodes { public static void main(String[] args) { /* * node data is stored in
行业垂直搜索引擎网页抓取项目 carlwu Lucene Nutch Heritrix Solr
公司有一个搜索引擎项目，希望各路高人有空来帮忙指导，谢谢！这是详细需求：（1）通过提供的网站地址(大概100-200个网站)，网页抓取程序能不断抓取网页和其它类型的文件（如Excel、PDF、Word、ppt及zip类型），并且程序能够根据事先提供的规则，过滤掉不相干的下载内容。（2）程序能够搜索这些抓取的内容，并能对这些抓取文件按照油田名进行分类，然后放到服务器不同的目录中。
[通讯与服务]在总带宽资源没有大幅增加之前,不适宜大幅度降低资费 comsci 资源
降低通讯服务资费，就意味着有更多的用户进入，就意味着通讯服务提供商要接待和服务更多的用户，在总体运维成本没有由于技术升级而大幅下降的情况下，这种降低资费的行为将导致每个用户的平均带宽不断下降，而享受到的服务质量也在下降，这对用户和服务商都是不利的。。。。。。。。 &nbs
Java时区转换及时间格式 Cwind java
本文介绍Java API 中 Date, Calendar, TimeZone和DateFormat的使用，以及不同时区时间相互转化的方法和原理。问题描述：向处于不同时区的服务器发请求时需要考虑时区转换的问题。譬如，服务器位于东八区（北京时间，GMT+8:00），而身处东四区的用户想要查询当天的销售记录。则需把东四区的“今天”这个时间范围转换为服务器所在时区的时间范围。
readonly,只读，不可用 dashuaifu js jsp disable readOnly readOnly
readOnly 和 readonly 不同，在做js开发时一定要注意函数大小写和jsp黄线的警告！！！我就经历过这么一件事：使用readOnly在某些浏览器或同一浏览器不同版本有的可以实现“只读”功能，有的就不行，而且函数readOnly有黄线警告！！！就这样被折磨了不短时间！！！（期间使用过disable函数，但是发现disable函数之后后台接收不到前台的的数据！！！）
LABjs、RequireJS、SeaJS 介绍 dcj3sjt126com js Web
LABjs 的核心是 LAB（Loading and Blocking）：Loading 指异步并行加载，Blocking 是指同步等待执行。LABjs 通过优雅的语法（script 和 wait）实现了这两大特性，核心价值是性能优化。LABjs 是一个文件加载器。RequireJS 和 SeaJS 则是模块加载器，倡导的是一种模块化开发理念，核心价值是让 JavaScript 的模块化开发变得更
[应用结构]入口脚本 dcj3sjt126com PHP yii2
入口脚本入口脚本是应用启动流程中的第一环，一个应用（不管是网页应用还是控制台应用）只有一个入口脚本。终端用户的请求通过入口脚本实例化应用并将将请求转发到应用。 Web 应用的入口脚本必须放在终端用户能够访问的目录下，通常命名为 index.php，也可以使用 Web 服务器能定位到的其他名称。控制台应用的入口脚本一般在应用根目录下命名为 yii（后缀为.php），该文
haoop shell命令 eksliang hadoop hadoop shell
cat chgrp chmod chown copyFromLocal copyToLocal cp du dus expunge get getmerge ls lsr mkdir movefromLocal mv put rm rmr setrep stat tail test text
MultiStateView不同的状态下显示不同的界面 gundumw100 android
只要将指定的view放在该控件里面，可以该view在不同的状态下显示不同的界面，这对ListView很有用，比如加载界面，空白界面，错误界面。而且这些见面由你指定布局，非常灵活。 PS：ListView虽然可以设置一个EmptyView，但使用起来不方便，不灵活，有点累赘。 <com.kennyc.view.MultiStateView xmlns:android=&qu
jQuery实现页面内锚点平滑跳转 ini JavaScript html jquery html5 css
平时我们做导航滚动到内容都是通过锚点来做，刷的一下就直接跳到内容了，没有一丝的滚动效果，而且 url 链接最后会有“小尾巴”，就像#keleyi，今天我就介绍一款 jquery 做的滚动的特效，既可以设置滚动速度，又可以在 url 链接上没有“小尾巴”。效果体验：http://keleyi.com/keleyi/phtml/jqtexiao/37.htmHTML文件代码： &
kafka offset迁移 kane_xie kafka
在早前的kafka版本中（0.8.0），offset是被存储在zookeeper中的。到当前版本（0.8.2）为止，kafka同时支持offset存储在zookeeper和offset manager（broker）中。从官方的说明来看，未来offset的zookeeper存储将会被弃用。因此现有的基于kafka的项目如果今后计划保持更新的话，可以考虑在合适
android > 搭建 cordova 环境 mft8899 android
1 , 安装 node.js http://nodejs.org node -v 查看版本 2, 安装 npm 可以先从 https://github.com/isaacs/npm/tags 下载源码解压到
java封装的比较器，比较是否全相同，获取不同字段名字 qifeifei
非常实用的java比较器，贴上代码： import java.util.HashSet; import java.util.List; import java.util.Set; import net.sf.json.JSONArray; import net.sf.json.JSONObject; import net.sf.json.JsonConfig; i
记录一些函数用法 .Aky. 位运算 PHP 数据库函数 IP
高手们照旧忽略。想弄个全天朝IP段数据库，找了个今天最新更新的国内所有运营商IP段，copy到文件，用文件函数，字符串函数把玩下。分割出startIp和endIp这样格式写入.txt文件，直接用phpmyadmin导入.csv文件的形式导入。（生命在于折腾，也许你们觉得我傻X，直接下载人家弄好的导入不就可以，做自己的菜鸟，让别人去说吧）当然用到了ip2long()函数把字符串转为整型数
sublime text 3 rust wudixiaotie Sublime Text
1.sublime text 3 => install package => Rust 2.cd ~/.config/sublime-text-3/Packages 3.mkdir rust 4.git clone https://github.com/sp0/rust-style 5.cd rust-style 6.cargo build --release 7.ctrl