集群运行状况API返回关于集群运行状况的简单状态。您还可以使用API仅获取指定数据流和索引的健康状态。对于数据流,API 检索流的支持索引的运行状况。
集群健康状态为:绿色、黄色和红色。在shard级别,红色状态表示集群中没有分配特定的shard,黄色状态表示主shard已分配,但副本未分配,绿色状态表示所有shard都已分配。索引级别状态由最差分片状态控制。集群状态由最差索引状态控制。
GET /_cluster/health/<target>
该API的主要优点之一是能够等待,直到集群达到某个较高的健康水平。例如,下面的代码将等待60秒使集群达到绿色状态(如果在60秒之前达到绿色状态,它将在该点返回):
# 当前状态为黄色
root@ubuntu-x64_02:/opt# curl -X GET "http://192.168.88.12:9201/_cluster/health?pretty"
{
"cluster_name" : "my-cluster",
"status" : "yellow", # <----- 状态为 yellow
"timed_out" : false,
"number_of_nodes" : 2,
"number_of_data_nodes" : 2,
"active_primary_shards" : 19,
"active_shards" : 38,
"relocating_shards" : 0,
"initializing_shards" : 0,
"unassigned_shards" : 3,
"delayed_unassigned_shards" : 0,
"number_of_pending_tasks" : 0,
"number_of_in_flight_fetch" : 0,
"task_max_waiting_in_queue_millis" : 0,
"active_shards_percent_as_number" : 92.6829268292683
}
# 默认 level 为cluster, 上面查询等同于
root@ubuntu-x64_02:/opt# curl -X GET "http://192.168.88.12:9201/_cluster/health/?level=cluster&pretty"
# 等待60秒后返回
root@ubuntu-x64_02:/opt# time curl -X GET "http://192.168.88.12:9201/_cluster/health?wait_for_status=green&timeout=60s&pretty"
{
"cluster_name" : "my-cluster",
"status" : "yellow",
"timed_out" : true,
"number_of_nodes" : 2,
"number_of_data_nodes" : 2,
"active_primary_shards" : 19,
"active_shards" : 38,
"relocating_shards" : 0,
"initializing_shards" : 0,
"unassigned_shards" : 3,
"delayed_unassigned_shards" : 0,
"number_of_pending_tasks" : 0,
"number_of_in_flight_fetch" : 0,
"task_max_waiting_in_queue_millis" : 0,
"active_shards_percent_as_number" : 92.6829268292683
}
real 1m0.014s
user 0m0.000s
sys 0m0.012s
# 如果在60秒之前达到绿色状态,它将在该点返回(如:30.120s )
root@ubuntu-x64_02:/opt# time curl -X GET "http://192.168.88.12:9201/_cluster/health?wait_for_status=green&timeout=60s&pretty"
{
"cluster_name" : "my-cluster",
"status" : "green",
"timed_out" : false,
"number_of_nodes" : 3,
"number_of_data_nodes" : 3,
"active_primary_shards" : 19,
"active_shards" : 41,
"relocating_shards" : 0,
"initializing_shards" : 0,
"unassigned_shards" : 0,
"delayed_unassigned_shards" : 0,
"number_of_pending_tasks" : 1,
"number_of_in_flight_fetch" : 0,
"task_max_waiting_in_queue_millis" : 0,
"active_shards_percent_as_number" : 100.0
}
real 0m30.120s
user 0m0.000s
sys 0m0.004s
以下是获取 shards 级别 集群健康状况的示例 :
root@ubuntu-x64_02:/opt# curl -X GET "http://192.168.88.12:9201/_cluster/health/_all?level=shards&pretty"
{
"cluster_name" : "my-cluster",
"status" : "green",
"timed_out" : false,
"number_of_nodes" : 3,
"number_of_data_nodes" : 3,
"active_primary_shards" : 19,
"active_shards" : 41,
"relocating_shards" : 0,
"initializing_shards" : 0,
"unassigned_shards" : 0,
"delayed_unassigned_shards" : 0,
"number_of_pending_tasks" : 0,
"number_of_in_flight_fetch" : 0,
"task_max_waiting_in_queue_millis" : 0,
"active_shards_percent_as_number" : 100.0,
"indices" : {
".ds-ilm-history-5-2023.05.04-000004" : {
"status" : "green",
"number_of_shards" : 1,
"number_of_replicas" : 1,
"active_primary_shards" : 1,
"active_shards" : 2,
"relocating_shards" : 0,
"initializing_shards" : 0,
"unassigned_shards" : 0,
"shards" : {
"0" : {
"status" : "green",
"primary_active" : true,
"active_shards" : 2,
"relocating_shards" : 0,
"initializing_shards" : 0,
"unassigned_shards" : 0
}
}
},
..........
返回有关集群状态的元数据。
GET /_cluster/state/<metrics>/<target>
路径参数
集群状态包含关于集群中所有索引的信息,包括它们的映射,以及模板和其他元数据。这意味着它有时会非常大。为了避免需要处理所有这些信息,你可以只请求你需要的部分集群状态:
下面的例子只返回 my-index-000001 的数据流或索引的 metadata 和 routing_table 数据:
root@ubuntu-x64_02:/opt# curl -X GET "http://192.168.88.12:9201/_cluster/state/metadata,routing_table/my-index-000001?pretty"
{
"cluster_name" : "my-cluster",
"cluster_uuid" : "hhRd0sm5SXqDi3oceLw3oQ",
"metadata" : {
......
"cluster_coordination" : {
......
},
"templates" : { },
"indices" : {
"my-index-000001" : {
"version" : 17,
......
"routing_num_shards" : 1024,
"state" : "open",
"settings" : {
"index" : {
......
}
},
"mappings" : {
"_doc" : {
"properties" : {
"employee-id" : {
"index" : false,
"type" : "keyword"
},
......
}
}
},
......
}
},
"index-graveyard" : {
"tombstones" : [ ]
}
},
"routing_table" : {
"indices" : {
"my-index-000001" : {
"shards" : {
"0" : [
{
"state" : "STARTED",
"primary" : true,
"node" : "vPD3eY-FSP6PcX1xY3SEyg",
"relocating_node" : null,
"shard" : 0,
"index" : "my-index-000001",
"allocation_id" : {
"id" : "qOrfDS_aQCqf4pxR8xP3pA"
}
},
{
"state" : "STARTED",
"primary" : false,
"node" : "QPS7tRDBRnCbpv0Dfx_0Lg",
"relocating_node" : null,
"shard" : 0,
"index" : "my-index-000001",
"allocation_id" : {
"id" : "lEzlrVv7SNKvqZasj_hRcA"
}
}
]
}
}
}
}
}
此示例仅返回 blocks 元数据:
root@ubuntu-x64_02:/opt# curl -X GET "http://192.168.88.12:9201/_cluster/state/blocks/my-index-000001?pretty"
{
"cluster_name" : "my-cluster",
"cluster_uuid" : "hhRd0sm5SXqDi3oceLw3oQ",
"blocks" : { }
}
提供对分片当前分配的解释。
GET _cluster/allocation/explain
POST _cluster/allocation/explain
集群分配解释API的目的是为集群中的分片分配提供解释。
当试图诊断为什么碎片未分配或为什么碎片继续保留在当前节点上时,这个API非常有用。
未分配的主分片, 下面的请求获取未分配主分片的分配解释。
# 创建索引 my-index-000002, 并让其分配到 "name" : "nonexistent_node" 的节点上
root@ubuntu-x64_02:/opt# curl -X PUT "http://192.168.88.12:9201/my-index-000002/?pretty" -H 'Content-Type: application/json' -d'
> {
> "settings" : {
> "index" : {
> "routing" : {
> "allocation" : {
> "include" : {
> "name" : "nonexistent_node"
> }
> }
> },
> "number_of_shards" : "1",
> "number_of_replicas" : "1"
> }
> }
> }
> '
{
"acknowledged" : true,
"shards_acknowledged" : false,
"index" : "my-index-000002"
}
查看解释索引 my-index-000002 未分配主分片的原因: 无法分配,因为任何节点都不允许分配
root@ubuntu-x64_02:/opt# curl -X GET "http://192.168.88.12:9201/_cluster/allocation/explain?pretty" -H 'Content-Type: application/json' -d'
> {
> "index": "my-index-000002",
> "shard": 0,
> "primary": true
> }
> '
{
"index" : "my-index-000002",
"shard" : 0,
"primary" : true,
"current_state" : "unassigned", # <------ 分片的当前状态。
"unassigned_info" : {
"reason" : "INDEX_CREATED", # <------------ 分片最初变得未分配的原因。
"at" : "2023-12-26T05:54:44.326Z",
"last_allocation_status" : "no"
},
"can_allocate" : "no", # <---------------- 是否分配分片
"allocate_explanation" : "cannot allocate because allocation is not permitted to any of the nodes",
"node_allocation_decisions" : [
{
"node_id" : "QPS7tRDBRnCbpv0Dfx_0Lg",
"node_name" : "node-3",
"transport_address" : "192.168.88.12:9303",
"node_attributes" : {
"ml.machine_memory" : "6248390656",
"ml.max_open_jobs" : "512",
"xpack.installed" : "true",
"ml.max_jvm_size" : "1073741824",
"transform.node" : "true"
},
"node_decision" : "no", # <------------ 是否将分片分配给特定节点。
"weight_ranking" : 1,
"deciders" : [
{
"decider" : "filter", # <------------ 导致no节点决策的决策者。
"decision" : "NO",
# 解释决策者返回决策的原因no,并提供有用的提示,指出导致决策的设置。
"explanation" : "node does not match index setting [index.routing.allocation.include] filters [name:\"nonexistent_node\"]"
}
]
},
{
"node_id" : "vPD3eY-FSP6PcX1xY3SEyg",
"node_name" : "node-1",
"transport_address" : "192.168.88.12:9301",
"node_attributes" : {
"ml.machine_memory" : "6248390656",
"ml.max_open_jobs" : "512",
"xpack.installed" : "true",
"ml.max_jvm_size" : "1073741824",
"transform.node" : "true"
},
"node_decision" : "no",
"weight_ranking" : 2,
"deciders" : [
{
"decider" : "filter",
"decision" : "NO",
"explanation" : "node does not match index setting [index.routing.allocation.include] filters [name:\"nonexistent_node\"]"
}
]
},
{
"node_id" : "32zPm9fvTPGxPMvNSsCZrw",
"node_name" : "node-2",
"transport_address" : "192.168.88.12:9302",
"node_attributes" : {
"ml.machine_memory" : "6248390656",
"ml.max_open_jobs" : "512",
"xpack.installed" : "true",
"ml.max_jvm_size" : "1073741824",
"transform.node" : "true"
},
"node_decision" : "no",
"weight_ranking" : 3,
"deciders" : [
{
"decider" : "filter",
"decision" : "NO",
"explanation" : "node does not match index setting [index.routing.allocation.include] filters [name:\"nonexistent_node\"]"
}
]
}
]
}
修改 name 为 节点的 已存在的,则 主分片 可以分配成功:
root@ubuntu-x64_02:/opt# curl -X PUT "http://192.168.88.12:9201/my-index-000002/_settings" -H 'Content-Type: application/json' -d'
> {
> "index" : {
> "routing" : {
> "allocation" : {
> "include" : {
> "name" : "node-1"
> }
> }
> }
> }
> }
> '
{"acknowledged":true}
# 此时状态为 yellow , 黄色状态表示主shard已分配,但副本未分配
root@ubuntu-x64_02:/opt# curl -X GET "http://192.168.88.12:9201/_cluster/health/my-index-000002?level=shards&pretty"
{
"cluster_name" : "my-cluster",
"status" : "yellow",
"timed_out" : false,
"number_of_nodes" : 3,
"number_of_data_nodes" : 3,
"active_primary_shards" : 1,
"active_shards" : 1,
"relocating_shards" : 0,
"initializing_shards" : 0,
"unassigned_shards" : 1,
"delayed_unassigned_shards" : 0,
"number_of_pending_tasks" : 0,
"number_of_in_flight_fetch" : 0,
"task_max_waiting_in_queue_millis" : 0,
"active_shards_percent_as_number" : 97.67441860465115,
"indices" : {
"my-index-000002" : {
"status" : "yellow",
"number_of_shards" : 1,
"number_of_replicas" : 1,
"active_primary_shards" : 1,
"active_shards" : 1,
"relocating_shards" : 0,
"initializing_shards" : 0,
"unassigned_shards" : 1,
"shards" : {
"0" : {
"status" : "yellow",
"primary_active" : true,
"active_shards" : 1,
"relocating_shards" : 0,
"initializing_shards" : 0,
"unassigned_shards" : 1
}
}
}
}
}
此时,集群状态为黄色, 集群状态由最差索引状态控制。
root@ubuntu-x64_02:/opt# curl -X GET "http://192.168.88.12:9201/_cluster/health?pretty"
{
"cluster_name" : "my-cluster",
"status" : "yellow", # <----------- 集群状态由最差索引状态控制
"timed_out" : false,
"number_of_nodes" : 3,
"number_of_data_nodes" : 3,
"active_primary_shards" : 20,
"active_shards" : 42,
"relocating_shards" : 0,
"initializing_shards" : 0,
"unassigned_shards" : 1,
"delayed_unassigned_shards" : 0,
"number_of_pending_tasks" : 0,
"number_of_in_flight_fetch" : 0,
"task_max_waiting_in_queue_millis" : 0,
"active_shards_percent_as_number" : 97.67441860465115
}