ES reindex 实战

机器配置

Data nodes:i3.4xlarge.search * 3     16C 122G
master nodes:c5.2xlarge.search * 3    8C 16G

案例1-kafka

kafka consumer tps = 18k qps
bulk request batch size = 100 (程序单线程等待100个消息到达后提交,因此系统的吞吐量与batch无关)
数据量(cat命令):

green  open   admin_operate_log_v1_2021       1   0        432            0    183.9kb        183.9kb
green  open   admin_operate_log_v1_2022       1   0   38855695            0     10.9gb         10.9gb
green  open   admin_operate_log_v1_2023       1   0   15996192            0      4.5gb          4.5gb

索引配置

"settings": {
        "index": {
            "refresh_interval": "-1",
            "number_of_shards": "1",
            "number_of_replicas": "0"
        }
    }

ES负载

cpu: 写入期间<=49%,写入完成后短暂峰值:68-72%
内存:73%左右
ES reindex 实战_第1张图片

性能

耗时:55-60分钟左右

案例2-reindex

数据量

health status index uuid pri rep docs.count docs.deleted store.size pri.store.size
green open user_operation_log_v1_202211 F7qtS9A3QxCFlIiZ8vCB0Q 2 1 6471447 0 5.3gb 2.6gb
green open user_operation_log_v1_202210 9MWtw90pTLes7ZzsDo5zxw 2 1 5479698 0 4.3gb 2.1gb
green open admin_operate_log_v1_2021 cxmLz54CR2C2eQkWcftGdw 1 0 432 0 183.9kb 183.9kb
green open admin_operate_log_v1_2022 mD9B3fY3T5moY2iMoSJ6rA 1 0 38855695 0 10.9gb 10.9gb
green open admin_operate_log_v1_2023 Q8umgdzcR1idQncdPnUCbA 1 0 15996192 0 4.5gb 4.5gb
green open user_operation_log_v1_202212 9Bo4HkB1T_G8oiOiJVcDow 2 1 5714707 0 4.6gb 2.3gb
green open user_operation_log_v1_202208 8jMbqQPIQT-GI67lo72Xbg 2 1 5070471 0 5gb 2.5gb
green open user_operation_log_v1_202207 ppYBaM3NQkaPBkZowXqbdA 2 1 5253290 0 5.1gb 2.5gb
green open user_operation_log_v1_202206 A12fRvkhTOCcI12QPCzbxw 2 1 5499146 0 5.6gb 2.8gb
green open user_operation_log_v1_202305 ltM-SbzaQkaEmqVmV84iEQ 2 1 1659596 50 1.1gb 583mb
green open user_operation_log_v1_202304 0W9kvl-jTlWizYltRq4yGQ 2 1 7589405 44 4.7gb 2.3gb
green open user_operation_log_v1_202205 KzLscm-gRSq-v7aps96M4Q 2 1 7343020 0 7.5gb 3.7gb
green open user_operation_log_v1_202209 DDMTU6mVTn-uF1NZPJ-x4Q 2 1 4699022 0 4.3gb 2.1gb
green open user_operation_log_v1_202303 f13sJ9wyRxagWYOCK0t07w 2 1 7698296 0 5gb 2.5gb
green open token_sale_v1 K7FFK24FSWi1oKlXrRuBug 3 1 1157 22 550.2kb 275.1kb
green open user_operation_log_v1_202204 grbxxC5iQie3ADnnyE5Erw 2 1 8288019 0 8.3gb 4.1gb
green open user_operation_log_v1_202302 oDOUznF5QKGgTmetRv2uRA 2 1 6797596 0 4.4gb 2.2gb
green open user_operation_log_v1_202203 aJLU9fhzT3uCNf2wVdb3Gw 2 1 9291234 0 11.3gb 5.6gb
green open .kibana_1 ErpcRNskT_-GLNVpeQd-1A 1 1 1 0 10.1kb 5kb
green open user_operation_log_v1_202301 BqpOEoDTTO2ue-uDfjUoCA 2 1 6103653 152925 4.2gb 2.1gb
green open user_operation_log_v1_202202 d9VaDToWSs2qbdyxJyDvdg 2 1 9415513 0 12gb 6gb
green open user_operation_log_v1_202201 FahUyKbeTYK_zYYc_UhofQ 2 1 11594662 0 15.4gb 7.7gb

索引配置

es_host='https://my_es_host'
indices=(user_operation_log_v1_202301 user_operation_log_v1_202302)

curl --location --request POST "${es_host}/_template/user_operation_log_template_v2" \
--header 'Content-Type: application/json' \
--data-raw '{
  "order": 0,
  "index_patterns": [
    "user_operation_log_v2*"
  ],
  "mappings": {
    "dynamic": "false",
    "properties": {
      "user_id": {
        "type": "long"
      },
      "operation": {
        "type": "keyword",
        "normalizer": "caseSensitive"
      },
      "real_ip": {
        "type": "keyword"
      },
      "full_ip": {
        "type": "keyword"
      },
      "client_type": {
        "type": "keyword",
        "normalizer": "caseSensitive"
      },
      "version_code": {
        "type": "keyword",
        "index": false
      },
      "apikey": {
        "type": "keyword",
        "index": false
      },
      "user_agent": {
        "type": "keyword",
        "index": false
      },
      "request_time": {
        "type": "long"
      },
      "response_time": {
        "type": "long",
        "index": false
      },
      "request": {
        "type": "text"
      },
      "response": {
        "type": "text"
      },
      "response_status": {
        "type": "keyword",
        "normalizer": "caseSensitive"
      }
    }
  },
  "settings": {
    "index": {
      "refresh_interval" : "-1",
      "number_of_shards": "2",
      "number_of_replicas": "0"
    },
    "analysis": {
      "normalizer": {
        "caseSensitive": {
          "filter": "lowercase",
          "type": "custom"
        }
      }
    }
  },
  "aliases": {
    "user_operation_log_v2": {}
  }
}'

for((i=0;i<${#indices[*]};i++));do
  target_index=${indices[i]/v1/v2};
  message="source = ${indices[i]}, target = ${target_index}";
  echo "${message}"
  start=$(date +"%s")
  curl -HContent-Type:application/json -XPOST "${es_host}/_reindex?slices=2&requests_per_second=-1&wait_for_completion=false&pretty" -d'{
    "source": {
      "index": "'${indices[i]}'"
      ,"size": 5000
    },
    "dest": {
      "index": "'${target_index}'"
    }
  }'
  end=$(date +"%s")
  echo "${message} cost `expr $end - $start` seconds"
done

ES负载

ES reindex 实战_第2张图片

性能

耗时:10分钟左右

第一次提交2个索引reindex

{
  "completed": true,
  "task": {
    "node": "b8mpzb7ARLau_iJVU4fkmg",
    "id": 246335488,
    "type": "transport",
    "action": "indices:data/write/reindex",
    "status": {
      "total": 9415513,
      "updated": 0,
      "created": 9415513,
      "deleted": 0,
      "batches": 1884,
      "version_conflicts": 0,
      "noops": 0,
      "retries": {
        "bulk": 0,
        "search": 0
      },
      "throttled_millis": 0,
      "requests_per_second": -1.0,
      "throttled_until_millis": 0,
      "slices": [
        {
          "slice_id": 0,
          "total": 4708452,
          "updated": 0,
          "created": 4708452,
          "deleted": 0,
          "batches": 942,
          "version_conflicts": 0,
          "noops": 0,
          "retries": {
            "bulk": 0,
            "search": 0
          },
          "throttled_millis": 0,
          "requests_per_second": -1.0,
          "throttled_until_millis": 0
        },
        {
          "slice_id": 1,
          "total": 4707061,
          "updated": 0,
          "created": 4707061,
          "deleted": 0,
          "batches": 942,
          "version_conflicts": 0,
          "noops": 0,
          "retries": {
            "bulk": 0,
            "search": 0
          },
          "throttled_millis": 0,
          "requests_per_second": -1.0,
          "throttled_until_millis": 0
        }
      ]
    },
    "description": "reindex from [user_operation_log_v1_202202] to [user_operation_log_v2_202202][_doc]",
    "start_time_in_millis": 1683784030867,
    "running_time_in_nanos": 447756932234,
    "cancellable": true,
    "headers": {
      
    }
  },
  "response": {
    "took": 447740,
    "timed_out": false,
    "total": 9415513,
    "updated": 0,
    "created": 9415513,
    "deleted": 0,
    "batches": 1884,
    "version_conflicts": 0,
    "noops": 0,
    "retries": {
      "bulk": 0,
      "search": 0
    },
    "throttled": "0s",
    "throttled_millis": 0,
    "requests_per_second": -1.0,
    "throttled_until": "0s",
    "throttled_until_millis": 0,
    "slices": [
      {
        "slice_id": 0,
        "total": 4708452,
        "updated": 0,
        "created": 4708452,
        "deleted": 0,
        "batches": 942,
        "version_conflicts": 0,
        "noops": 0,
        "retries": {
          "bulk": 0,
          "search": 0
        },
        "throttled": "0s",
        "throttled_millis": 0,
        "requests_per_second": -1.0,
        "throttled_until": "0s",
        "throttled_until_millis": 0
      },
      {
        "slice_id": 1,
        "total": 4707061,
        "updated": 0,
        "created": 4707061,
        "deleted": 0,
        "batches": 942,
        "version_conflicts": 0,
        "noops": 0,
        "retries": {
          "bulk": 0,
          "search": 0
        },
        "throttled": "0s",
        "throttled_millis": 0,
        "requests_per_second": -1.0,
        "throttled_until": "0s",
        "throttled_until_millis": 0
      }
    ],
    "failures": [
      
    ]
  }
}
-
{
  "completed": true,
  "task": {
    "node": "BrXZu22XQfi94P1l5ErOwA",
    "id": 168840526,
    "type": "transport",
    "action": "indices:data/write/reindex",
    "status": {
      "total": 9291234,
      "updated": 0,
      "created": 9291234,
      "deleted": 0,
      "batches": 1859,
      "version_conflicts": 0,
      "noops": 0,
      "retries": {
        "bulk": 0,
        "search": 0
      },
      "throttled_millis": 0,
      "requests_per_second": -1.0,
      "throttled_until_millis": 0,
      "slices": [
        {
          "slice_id": 0,
          "total": 4642972,
          "updated": 0,
          "created": 4642972,
          "deleted": 0,
          "batches": 929,
          "version_conflicts": 0,
          "noops": 0,
          "retries": {
            "bulk": 0,
            "search": 0
          },
          "throttled_millis": 0,
          "requests_per_second": -1.0,
          "throttled_until_millis": 0
        },
        {
          "slice_id": 1,
          "total": 4648262,
          "updated": 0,
          "created": 4648262,
          "deleted": 0,
          "batches": 930,
          "version_conflicts": 0,
          "noops": 0,
          "retries": {
            "bulk": 0,
            "search": 0
          },
          "throttled_millis": 0,
          "requests_per_second": -1.0,
          "throttled_until_millis": 0
        }
      ]
    },
    "description": "reindex from [user_operation_log_v1_202203] to [user_operation_log_v2_202203][_doc]",
    "start_time_in_millis": 1683784030916,
    "running_time_in_nanos": 437625719457,
    "cancellable": true,
    "headers": {
      
    }
  },
  "response": {
    "took": 437607,
    "timed_out": false,
    "total": 9291234,
    "updated": 0,
    "created": 9291234,
    "deleted": 0,
    "batches": 1859,
    "version_conflicts": 0,
    "noops": 0,
    "retries": {
      "bulk": 0,
      "search": 0
    },
    "throttled": "0s",
    "throttled_millis": 0,
    "requests_per_second": -1.0,
    "throttled_until": "0s",
    "throttled_until_millis": 0,
    "slices": [
      {
        "slice_id": 0,
        "total": 4642972,
        "updated": 0,
        "created": 4642972,
        "deleted": 0,
        "batches": 929,
        "version_conflicts": 0,
        "noops": 0,
        "retries": {
          "bulk": 0,
          "search": 0
        },
        "throttled": "0s",
        "throttled_millis": 0,
        "requests_per_second": -1.0,
        "throttled_until": "0s",
        "throttled_until_millis": 0
      },
      {
        "slice_id": 1,
        "total": 4648262,
        "updated": 0,
        "created": 4648262,
        "deleted": 0,
        "batches": 930,
        "version_conflicts": 0,
        "noops": 0,
        "retries": {
          "bulk": 0,
          "search": 0
        },
        "throttled": "0s",
        "throttled_millis": 0,
        "requests_per_second": -1.0,
        "throttled_until": "0s",
        "throttled_until_millis": 0
      }
    ],
    "failures": [
      
    ]
  }
}

第二次提交14个索引reindex

202204-202305
日志过多省略,与第一次提交差别不大

总结

案例1

kafka:tps=18k
文档量:54851887
文档大小:15.4GB
ES节点:单个
耗时:55-60分钟
负载:cpu=50%左右,内存=70%左右

案例2

根据第二次提交的reindex分析
文档量:113968775
文档大小:53.6GB
ES节点:3个
耗时:10分钟
负载:cpu=80%左右,内存=80%左右

正如开头描述的,系统单线程工作瓶颈明显,并发与吞吐量低,优点是限制了单个业务索引(多个物理索引按月分片)资源占用,避免拖垮系统

reindex风险

14个索引reindex的异步任务启动时间如下,可以看到几乎是同时启动的,因此批量操作是要把控并行的数量,slices参数以及size参数的配置,都要事先把控,避免将服务器跑垮

1683785201349 2023-05-11 14:06:41
1683785201371 2023-05-11 14:06:41
1683785201395
1683785201418
1683785201440
1683785201463
1683785201485
1683785201508
1683785201531
1683785201551
1683785201572
1683785201598
1683785201623
1683785201645

你可能感兴趣的:(elasticsearch,5G,大数据,搜索引擎)