python requests elasticsearch auth reindex alias 通过requests用户名密码验证实现es异步重建并指定别名

在使用elasticsearch7.x的过程中,发现elasticsearch默认的副本数和分片数都为1,随着数据量不断增多,一个分片导致写入索引的效率越来越低,之后决定对业务层和数仓的索引进行重建

# -*- coding: utf-8 -*-
# @Time    : 2019/9/21 13:48
# @Author  : Cocktail_py

import logging
import traceback

import requests
from elasticsearch import Elasticsearch
from requests.auth import HTTPBasicAuth

logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(message)s",
    level=logging.INFO)

es = Elasticsearch("host1:19200,host2:19200,host3:19200".split(","),
                   http_auth=("username", "password"), timeout=180,
                   max_retries=10,
                   retry_on_timeout=True)

# 获取数仓索引以及相应的mapping
for da in list(es.indices.get_mapping("dw_gofish_*").items()):
    index = da[0]
    mapping = da[1]
    mapping["settings"] = {
        # 先改為-1(禁用刷新),之後根據業務場景設置(30s,或者更長)
        "refresh_interval": "-1",
        "translog": {
            "flush_threshold_size": "2gb",
            "sync_interval": "120s",
            "durability": "async"
        },
        "index": {
            # 分片數,分片数量建议为>=节点数,平均每个分片数据不超过30G
            "number_of_shards": "10",
            # 副本先改為0,reindex之後改為1
            "number_of_replicas": "0"
        }

    }

    # 旧索引
    index_nw = "{}_nw".format(index)

    # 新建新的索引
    try:
        result = es.indices.create(index_nw, mapping)
        logging.info(result)
    except:
        logging.error(traceback.format_exc())
        pass

    # 取别名
    es.indices.put_alias([index, index_nw], name="{}_alias".format(index))

    # 异步reindex
    result = requests.post(
        "http://ip1:19200/_reindex?refresh&wait_for_completion=false",
        json={
            "source": {
                "index": index,
                "size": 10000
            },
            "dest": {
                "index": index_nw
            }
        },
        headers={'Content-Type': 'application/json'},
        # auth 用戶名密碼
        auth=HTTPBasicAuth("username", "password"))
    logging.info(result.text)

查看索引已存在的索引、文档数量,占用存储空间大小等信息

# pri(主分片数)、rep(副分片数)、docs.count(索引现有文档数)、docs.deleted(索引标记为删除的文档数)、store.size(索引总大小)、pri.store.size(索引主分片大小)
GET _cat/indices/index_name?v

查看索引settings设置

GET /index_name/_settings

查看reindex进度

GET _tasks?detailed=true&actions=*reindex
# 删除所有滚动
import requests
from requests.auth import HTTPBasicAuth
result = requests.delete(
"http://xxx.xxx.xxxx:19200/_search/scroll/_all",
headers={'Content-Type': 'application/json'},
# auth 用戶名密碼
auth=HTTPBasicAuth("username", "password"))
print(result)

你可能感兴趣的:(Elasticsearch,elasticsearch)