https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations.html
语法:
"aggs" : { #和query同级的关键词
"" : { #自定义的聚合名字
"" : { #聚合的定义: 不同的type+body
<aggregation_body>
}
[,"meta" : { [<meta_data_body>] } ]?
[,"aggregations" : { [<sub_aggregation>]+ } ]? #子聚合查询
}
[,"" : { ... } ]* #可以包含多个同级的聚合查询
}
SELECT MIN(price), MAX(price) FROM products
#Metric聚合的DSL类比实现:
{
"aggs":{
"avg_price":{
"avg":{
"field":"price"
}
}
}
}
#"size": 0 只返回统计结果
POST /employees/_search
{
"size": 0,
"aggs": {
"max_salary": {
"max": {
"field": "salary"
}
},
"min_salary": {
"min": {
"field": "salary"
}
},
"avg_salary": {
"avg": {
"field": "salary"
}
}
}
}
response
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"max_salary" : {
"value" : 50000.0
},
"avg_salary" : {
"value" : 24700.0
},
"min_salary" : {
"value" : 9000.0
}
}
}
POST /employees/_search
{
"size": 0,
"aggs": {
"stats_salary": {
"stats": {
"field": "salary"
}
}
}
}
response
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"stats_salary" : {
"count" : 20,
"min" : 9000.0,
"max" : 50000.0,
"avg" : 24700.0,
"sum" : 494000.0
}
}
}
POST /employees/_search
{
"size": 0,
"aggs": {
"cardinate": {
"cardinality": {
"field": "job.keyword"
}
}
}
}
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"cardinate" : {
"value" : 7
}
}
}
SELECT size COUNT(*) FROM products GROUP BY size
#bucket聚合的DSL类比实现:
{
"aggs": {
"by_size": {
"terms": {
"field": "size"
}
}
}
按照一定的规则,将文档分配到不同的桶中,从而达到分类的目的。ES提供的一些常见的 Bucket Aggregation
field、size、order
聚合可配置属性有:
默认情况下,Bucket聚合会统计Bucket内的文档数量,记为_count,并且按照_count降序排序。我们可以指定order属性,自定义聚合的排序方式:
# 对keword 进行聚合
# 对keword 进行聚合
GET /employees/_search
{
"size": 0,
"aggs": {
"jobs": {
"terms": {
"field":"job.keyword",
"size": 10,
"order": {
"_count": "desc"
}
}
}
}
}
response
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"jobs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Java Programmer",
"doc_count" : 7
},
{
"key" : "Javascript Programmer",
"doc_count" : 4
},
{
"key" : "QA",
"doc_count" : 3
},
{
"key" : "DBA",
"doc_count" : 2
},
{
"key" : "Web Designer",
"doc_count" : 2
},
{
"key" : "Dev Manager",
"doc_count" : 1
},
{
"key" : "Product Manager",
"doc_count" : 1
}
]
}
}
}
#只对salary在10000元以上的文档聚合
GET /employees/_search
{
"query": {
"range": {
"salary": {
"gte": 10000
}
}
},
"size": 0,
"aggs": {
"jobs": {
"terms": {
"field":"job.keyword",
"size": 10,
"order": {
"_count": "desc"
}
}
}
}
}
POST /employees/_search
{
"size": 0,
"aggs": {
"jobs": {
"terms": {
"field":"job"
}
}
}
}
response
{
"error" : {
"root_cause" : [
{
"type" : "illegal_argument_exception",
"reason" : "Text fields are not optimised for operations that require per-document field data like aggregations and sorting, so these operations are disabled by default. Please use a keyword field instead. Alternatively, set fielddata=true on [job] in order to load field data by uninverting the inverted index. Note that this can use significant memory."
}
],
"type" : "search_phase_execution_exception",
"reason" : "all shards failed",
"phase" : "query",
"grouped" : true,
"failed_shards" : [
{
"shard" : 0,
"index" : "employees",
"node" : "rEYg9XpfS_uCtGpHpeoSCw",
"reason" : {
"type" : "illegal_argument_exception",
"reason" : "Text fields are not optimised for operations that require per-document field data like aggregations and sorting, so these operations are disabled by default. Please use a keyword field instead. Alternatively, set fielddata=true on [job] in order to load field data by uninverting the inverted index. Note that this can use significant memory."
}
}
],
"caused_by" : {
"type" : "illegal_argument_exception",
"reason" : "Text fields are not optimised for operations that require per-document field data like aggregations and sorting, so these operations are disabled by default. Please use a keyword field instead. Alternatively, set fielddata=true on [job] in order to load field data by uninverting the inverted index. Note that this can use significant memory.",
"caused_by" : {
"type" : "illegal_argument_exception",
"reason" : "Text fields are not optimised for operations that require per-document field data like aggregations and sorting, so these operations are disabled by default. Please use a keyword field instead. Alternatively, set fielddata=true on [job] in order to load field data by uninverting the inverted index. Note that this can use significant memory."
}
}
},
"status" : 400
}
解决办法:对 Text 字段打开 fielddata,支持terms aggregation
PUT /employees/_mapping
{
"properties" : {
"job":{
"type": "text",
"fielddata": true
}
}
}
terms
POST /employees/_search
{
"size": 0,
"aggs": {
"cardinate": {
"cardinality": {
"field": "job"
}
}
}
}
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"cardinate" : {
"value" : 10
}
}
}
keyword
POST /employees/_search
{
"size": 0,
"aggs": {
"cardinate": {
"cardinality": {
"field": "job.keyword"
}
}
}
}
response
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"cardinate" : {
"value" : 7
}
}
}
POST employees/_search
{
"size": 0,
"aggs": {
"salary_range": {
"range": {
"field":"salary",
"ranges":[
{
"to":10000
},
{
"from":10000,
"to":20000
},
{
"key":">20000",
"from":20000
}
]
}
}
}
}
response
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"salary_range" : {
"buckets" : [
{
"key" : "*-10000.0",
"to" : 10000.0,
"doc_count" : 1
},
{
"key" : "10000.0-20000.0",
"from" : 10000.0,
"to" : 20000.0,
"doc_count" : 4
},
{
"key" : ">20000",
"from" : 20000.0,
"doc_count" : 15
}
]
}
}
}
#工资0到10万,以 5000一个区间进行分桶
POST employees/_search
{
"size": 0,
"aggs": {
"salary_histrogram": {
"histogram": {
"field":"salary",
"interval":5000,
"extended_bounds":{
"min":0,
"max":100000
}
}
}
}
}
response
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"salary_histrogram" : {
"buckets" : [
{
"key" : 0.0,
"doc_count" : 0
},
{
"key" : 5000.0,
"doc_count" : 1
},
{
"key" : 10000.0,
"doc_count" : 0
},
{
"key" : 15000.0,
"doc_count" : 4
},
{
"key" : 20000.0,
"doc_count" : 6
},
{
"key" : 25000.0,
"doc_count" : 3
},
{
"key" : 30000.0,
"doc_count" : 3
},
{
"key" : 35000.0,
"doc_count" : 2
},
{
"key" : 40000.0,
"doc_count" : 0
},
{
"key" : 45000.0,
"doc_count" : 0
},
{
"key" : 50000.0,
"doc_count" : 1
},
{
"key" : 55000.0,
"doc_count" : 0
},
{
"key" : 60000.0,
"doc_count" : 0
},
{
"key" : 65000.0,
"doc_count" : 0
},
{
"key" : 70000.0,
"doc_count" : 0
},
{
"key" : 75000.0,
"doc_count" : 0
},
{
"key" : 80000.0,
"doc_count" : 0
},
{
"key" : 85000.0,
"doc_count" : 0
},
{
"key" : 90000.0,
"doc_count" : 0
},
{
"key" : 95000.0,
"doc_count" : 0
},
{
"key" : 100000.0,
"doc_count" : 0
}
]
}
}
}
# 指定size,不同工种中,年纪最大的3个员工的具体信息
POST /employees/_search
{
"size": 0,
"aggs": {
"jobs": {
"terms": {
"field":"job.keyword"
},
"aggs":{
"old_employee":{
"top_hits":{
"size":3,
"sort":[
{
"age":{
"order":"desc"
}
}
]
}
}
}
}
}
}
response
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"jobs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Java Programmer",
"doc_count" : 7,
"old_employee" : {
"hits" : {
"total" : {
"value" : 7,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "11",
"_score" : null,
"_source" : {
"name" : "Jenny",
"age" : 36,
"job" : "Java Programmer",
"gender" : "female",
"salary" : 38000
},
"sort" : [
36
]
},
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "15",
"_score" : null,
"_source" : {
"name" : "King",
"age" : 33,
"job" : "Java Programmer",
"gender" : "male",
"salary" : 28000
},
"sort" : [
33
]
},
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "9",
"_score" : null,
"_source" : {
"name" : "Gregory",
"age" : 32,
"job" : "Java Programmer",
"gender" : "male",
"salary" : 22000
},
"sort" : [
32
]
}
]
}
}
},
{
"key" : "Javascript Programmer",
"doc_count" : 4,
"old_employee" : {
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "14",
"_score" : null,
"_source" : {
"name" : "Marshall",
"age" : 32,
"job" : "Javascript Programmer",
"gender" : "male",
"salary" : 25000
},
"sort" : [
32
]
},
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "18",
"_score" : null,
"_source" : {
"name" : "Catherine",
"age" : 29,
"job" : "Javascript Programmer",
"gender" : "female",
"salary" : 20000
},
"sort" : [
29
]
},
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "17",
"_score" : null,
"_source" : {
"name" : "Goodwin",
"age" : 25,
"job" : "Javascript Programmer",
"gender" : "male",
"salary" : 16000
},
"sort" : [
25
]
}
]
}
}
},
{
"key" : "QA",
"doc_count" : 3,
"old_employee" : {
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "6",
"_score" : null,
"_source" : {
"name" : "Lucy",
"age" : 31,
"job" : "QA",
"gender" : "female",
"salary" : 25000
},
"sort" : [
31
]
},
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "7",
"_score" : null,
"_source" : {
"name" : "Byrd",
"age" : 27,
"job" : "QA",
"gender" : "male",
"salary" : 20000
},
"sort" : [
27
]
},
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "5",
"_score" : null,
"_source" : {
"name" : "Rose",
"age" : 25,
"job" : "QA",
"gender" : "female",
"salary" : 18000
},
"sort" : [
25
]
}
]
}
}
},
{
"key" : "DBA",
"doc_count" : 2,
"old_employee" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "19",
"_score" : null,
"_source" : {
"name" : "Boone",
"age" : 30,
"job" : "DBA",
"gender" : "male",
"salary" : 30000
},
"sort" : [
30
]
},
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "20",
"_score" : null,
"_source" : {
"name" : "Kathy",
"age" : 29,
"job" : "DBA",
"gender" : "female",
"salary" : 20000
},
"sort" : [
29
]
}
]
}
}
},
{
"key" : "Web Designer",
"doc_count" : 2,
"old_employee" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "4",
"_score" : null,
"_source" : {
"name" : "Rivera",
"age" : 26,
"job" : "Web Designer",
"gender" : "female",
"salary" : 22000
},
"sort" : [
26
]
},
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "3",
"_score" : null,
"_source" : {
"name" : "Tran",
"age" : 25,
"job" : "Web Designer",
"gender" : "male",
"salary" : 18000
},
"sort" : [
25
]
}
]
}
}
},
{
"key" : "Dev Manager",
"doc_count" : 1,
"old_employee" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "2",
"_score" : null,
"_source" : {
"name" : "Underwood",
"age" : 41,
"job" : "Dev Manager",
"gender" : "male",
"salary" : 50000
},
"sort" : [
41
]
}
]
}
}
},
{
"key" : "Product Manager",
"doc_count" : 1,
"old_employee" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "employees",
"_type" : "_doc",
"_id" : "1",
"_score" : null,
"_source" : {
"name" : "Emma",
"age" : 32,
"job" : "Product Manager",
"gender" : "female",
"salary" : 35000
},
"sort" : [
32
]
}
]
}
}
}
]
}
}
}
POST employees/_search
{
"size": 0,
"aggs": {
"Job_salary_stats": {
"terms": {
"field": "job.keyword"
},
"aggs": {
"salary": {
"stats": {
"field": "salary"
}
}
}
}
}
}
response
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"Job_salary_stats" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Java Programmer",
"doc_count" : 7,
"salary" : {
"count" : 7,
"min" : 9000.0,
"max" : 38000.0,
"avg" : 25571.428571428572,
"sum" : 179000.0
}
},
{
"key" : "Javascript Programmer",
"doc_count" : 4,
"salary" : {
"count" : 4,
"min" : 16000.0,
"max" : 25000.0,
"avg" : 19250.0,
"sum" : 77000.0
}
},
{
"key" : "QA",
"doc_count" : 3,
"salary" : {
"count" : 3,
"min" : 18000.0,
"max" : 25000.0,
"avg" : 21000.0,
"sum" : 63000.0
}
},
{
"key" : "DBA",
"doc_count" : 2,
"salary" : {
"count" : 2,
"min" : 20000.0,
"max" : 30000.0,
"avg" : 25000.0,
"sum" : 50000.0
}
},
{
"key" : "Web Designer",
"doc_count" : 2,
"salary" : {
"count" : 2,
"min" : 18000.0,
"max" : 22000.0,
"avg" : 20000.0,
"sum" : 40000.0
}
},
{
"key" : "Dev Manager",
"doc_count" : 1,
"salary" : {
"count" : 1,
"min" : 50000.0,
"max" : 50000.0,
"avg" : 50000.0,
"sum" : 50000.0
}
},
{
"key" : "Product Manager",
"doc_count" : 1,
"salary" : {
"count" : 1,
"min" : 35000.0,
"max" : 35000.0,
"avg" : 35000.0,
"sum" : 35000.0
}
}
]
}
}
}
# 多次嵌套。根据工作类型分桶,然后按照性别分桶,计算工资的统计信息
POST employees/_search
{
"size": 0,
"aggs": {
"Job_gender_stats": {
"terms": {
"field": "job.keyword"
},
"aggs": {
"gender_stats": {
"terms": {
"field": "gender"
},
"aggs": {
"salary_stats": {
"stats": {
"field": "salary"
}
}
}
}
}
}
}
}
response
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"Job_gender_stats" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Java Programmer",
"doc_count" : 7,
"gender_stats" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "male",
"doc_count" : 5,
"salary_stats" : {
"count" : 5,
"min" : 9000.0,
"max" : 32000.0,
"avg" : 22200.0,
"sum" : 111000.0
}
},
{
"key" : "female",
"doc_count" : 2,
"salary_stats" : {
"count" : 2,
"min" : 30000.0,
"max" : 38000.0,
"avg" : 34000.0,
"sum" : 68000.0
}
}
]
}
},
{
"key" : "Javascript Programmer",
"doc_count" : 4,
"gender_stats" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "male",
"doc_count" : 3,
"salary_stats" : {
"count" : 3,
"min" : 16000.0,
"max" : 25000.0,
"avg" : 19000.0,
"sum" : 57000.0
}
},
{
"key" : "female",
"doc_count" : 1,
"salary_stats" : {
"count" : 1,
"min" : 20000.0,
"max" : 20000.0,
"avg" : 20000.0,
"sum" : 20000.0
}
}
]
}
},
{
"key" : "QA",
"doc_count" : 3,
"gender_stats" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "female",
"doc_count" : 2,
"salary_stats" : {
"count" : 2,
"min" : 18000.0,
"max" : 25000.0,
"avg" : 21500.0,
"sum" : 43000.0
}
},
{
"key" : "male",
"doc_count" : 1,
"salary_stats" : {
"count" : 1,
"min" : 20000.0,
"max" : 20000.0,
"avg" : 20000.0,
"sum" : 20000.0
}
}
]
}
},
{
"key" : "DBA",
"doc_count" : 2,
"gender_stats" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "female",
"doc_count" : 1,
"salary_stats" : {
"count" : 1,
"min" : 20000.0,
"max" : 20000.0,
"avg" : 20000.0,
"sum" : 20000.0
}
},
{
"key" : "male",
"doc_count" : 1,
"salary_stats" : {
"count" : 1,
"min" : 30000.0,
"max" : 30000.0,
"avg" : 30000.0,
"sum" : 30000.0
}
}
]
}
},
{
"key" : "Web Designer",
"doc_count" : 2,
"gender_stats" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "female",
"doc_count" : 1,
"salary_stats" : {
"count" : 1,
"min" : 22000.0,
"max" : 22000.0,
"avg" : 22000.0,
"sum" : 22000.0
}
},
{
"key" : "male",
"doc_count" : 1,
"salary_stats" : {
"count" : 1,
"min" : 18000.0,
"max" : 18000.0,
"avg" : 18000.0,
"sum" : 18000.0
}
}
]
}
},
{
"key" : "Dev Manager",
"doc_count" : 1,
"gender_stats" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "male",
"doc_count" : 1,
"salary_stats" : {
"count" : 1,
"min" : 50000.0,
"max" : 50000.0,
"avg" : 50000.0,
"sum" : 50000.0
}
}
]
}
},
{
"key" : "Product Manager",
"doc_count" : 1,
"gender_stats" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "female",
"doc_count" : 1,
"salary_stats" : {
"count" : 1,
"min" : 35000.0,
"max" : 35000.0,
"avg" : 35000.0,
"sum" : 35000.0
}
}
]
}
}
]
}
}
}
支持对聚合分析的结果,再次进行聚合分析。
Pipeline 的分析结果会输出到原结果中,根据位置的不同,分为两类:
# 平均工资最低的工种
POST employees/_search
{
"size": 0,
"aggs": {
"jobs": {
"terms": {
"field": "job.keyword",
"size": 5
},
"aggs": {
"avg_salary": {
"avg": {
"field": "salary"
}
}
}
},
"min_salary_by_job":{
"min_bucket": {
"buckets_path": "jobs>avg_salary"
}
}
}
}
response
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"jobs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 2,
"buckets" : [
{
"key" : "Java Programmer",
"doc_count" : 7,
"avg_salary" : {
"value" : 25571.428571428572
}
},
{
"key" : "Javascript Programmer",
"doc_count" : 4,
"avg_salary" : {
"value" : 19250.0
}
},
{
"key" : "QA",
"doc_count" : 3,
"avg_salary" : {
"value" : 21000.0
}
},
{
"key" : "DBA",
"doc_count" : 2,
"avg_salary" : {
"value" : 25000.0
}
},
{
"key" : "Web Designer",
"doc_count" : 2,
"avg_salary" : {
"value" : 20000.0
}
}
]
},
"min_salary_by_job" : {
"value" : 19250.0,
"keys" : [
"Javascript Programmer"
]
}
}
}
# 平均工资的统计分析
POST employees/_search
{
"size": 0,
"aggs": {
"jobs": {
"terms": {
"field": "job.keyword",
"size": 5
},
"aggs": {
"avg_salary": {
"avg": {
"field": "salary"
}
}
}
},
"stats_salary_by_job":{
"stats_bucket": {
"buckets_path": "jobs>avg_salary"
}
}
}
}
response
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"jobs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 2,
"buckets" : [
{
"key" : "Java Programmer",
"doc_count" : 7,
"avg_salary" : {
"value" : 25571.428571428572
}
},
{
"key" : "Javascript Programmer",
"doc_count" : 4,
"avg_salary" : {
"value" : 19250.0
}
},
{
"key" : "QA",
"doc_count" : 3,
"avg_salary" : {
"value" : 21000.0
}
},
{
"key" : "DBA",
"doc_count" : 2,
"avg_salary" : {
"value" : 25000.0
}
},
{
"key" : "Web Designer",
"doc_count" : 2,
"avg_salary" : {
"value" : 20000.0
}
}
]
},
"stats_salary_by_job" : {
"count" : 5,
"min" : 19250.0,
"max" : 25571.428571428572,
"avg" : 22164.285714285717,
"sum" : 110821.42857142858
}
}
}
# 平均工资的百分位数
POST employees/_search
{
"size": 0,
"aggs": {
"jobs": {
"terms": {
"field": "job.keyword",
"size": 5
},
"aggs": {
"avg_salary": {
"avg": {
"field": "salary"
}
}
}
},
"percentiles_salary_by_job":{
"percentiles_bucket": {
"buckets_path": "jobs>avg_salary"
}
}
}
}
response
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"jobs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 2,
"buckets" : [
{
"key" : "Java Programmer",
"doc_count" : 7,
"avg_salary" : {
"value" : 25571.428571428572
}
},
{
"key" : "Javascript Programmer",
"doc_count" : 4,
"avg_salary" : {
"value" : 19250.0
}
},
{
"key" : "QA",
"doc_count" : 3,
"avg_salary" : {
"value" : 21000.0
}
},
{
"key" : "DBA",
"doc_count" : 2,
"avg_salary" : {
"value" : 25000.0
}
},
{
"key" : "Web Designer",
"doc_count" : 2,
"avg_salary" : {
"value" : 20000.0
}
}
]
},
"percentiles_salary_by_job" : {
"values" : {
"1.0" : 19250.0,
"5.0" : 19250.0,
"25.0" : 20000.0,
"50.0" : 21000.0,
"75.0" : 25000.0,
"95.0" : 25571.428571428572,
"99.0" : 25571.428571428572
}
}
}
}
#cumulative_sum 累计求和
POST employees/_search
{
"size": 0,
"aggs": {
"age": {
"histogram": {
"field": "age",
"min_doc_count": 0,
"interval": 1
},
"aggs": {
"avg_salary": {
"avg": {
"field": "salary"
}
},
"cumulative_salary":{
"cumulative_sum": {
"buckets_path": "avg_salary"
}
}
}
}
}
}
response
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 20,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"age" : {
"buckets" : [
{
"key" : 20.0,
"doc_count" : 1,
"avg_salary" : {
"value" : 9000.0
},
"cumulative_salary" : {
"value" : 9000.0
}
},
{
"key" : 21.0,
"doc_count" : 1,
"avg_salary" : {
"value" : 16000.0
},
"cumulative_salary" : {
"value" : 25000.0
}
},
{
"key" : 22.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 25000.0
}
},
{
"key" : 23.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 25000.0
}
},
{
"key" : 24.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 25000.0
}
},
{
"key" : 25.0,
"doc_count" : 3,
"avg_salary" : {
"value" : 17333.333333333332
},
"cumulative_salary" : {
"value" : 42333.33333333333
}
},
{
"key" : 26.0,
"doc_count" : 1,
"avg_salary" : {
"value" : 22000.0
},
"cumulative_salary" : {
"value" : 64333.33333333333
}
},
{
"key" : 27.0,
"doc_count" : 2,
"avg_salary" : {
"value" : 20000.0
},
"cumulative_salary" : {
"value" : 84333.33333333333
}
},
{
"key" : 28.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 84333.33333333333
}
},
{
"key" : 29.0,
"doc_count" : 2,
"avg_salary" : {
"value" : 20000.0
},
"cumulative_salary" : {
"value" : 104333.33333333333
}
},
{
"key" : 30.0,
"doc_count" : 2,
"avg_salary" : {
"value" : 30000.0
},
"cumulative_salary" : {
"value" : 134333.3333333333
}
},
{
"key" : 31.0,
"doc_count" : 2,
"avg_salary" : {
"value" : 28500.0
},
"cumulative_salary" : {
"value" : 162833.3333333333
}
},
{
"key" : 32.0,
"doc_count" : 3,
"avg_salary" : {
"value" : 27333.333333333332
},
"cumulative_salary" : {
"value" : 190166.66666666666
}
},
{
"key" : 33.0,
"doc_count" : 1,
"avg_salary" : {
"value" : 28000.0
},
"cumulative_salary" : {
"value" : 218166.66666666666
}
},
{
"key" : 34.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 218166.66666666666
}
},
{
"key" : 35.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 218166.66666666666
}
},
{
"key" : 36.0,
"doc_count" : 1,
"avg_salary" : {
"value" : 38000.0
},
"cumulative_salary" : {
"value" : 256166.66666666666
}
},
{
"key" : 37.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 256166.66666666666
}
},
{
"key" : 38.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 256166.66666666666
}
},
{
"key" : 39.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 256166.66666666666
}
},
{
"key" : 40.0,
"doc_count" : 0,
"avg_salary" : {
"value" : null
},
"cumulative_salary" : {
"value" : 256166.66666666666
}
},
{
"key" : 41.0,
"doc_count" : 1,
"avg_salary" : {
"value" : 50000.0
},
"cumulative_salary" : {
"value" : 306166.6666666666
}
}
]
}
}
}
ES聚合分析的默认作用范围是query的查询结果集,同时ES还支持以下方式改变聚合的作用范围:
#Query
POST employees/_search
{
"size": 0,
"query": {
"range": {
"age": {
"gte": 20
}
}
},
"aggs": {
"jobs": {
"terms": {
"field":"job.keyword"
}
}
}
}
#Filter
POST employees/_search
{
"size": 0,
"aggs": {
"older_person": {
"filter":{
"range":{
"age":{
"from":35
}
}
},
"aggs":{
"jobs":{
"terms": {
"field":"job.keyword"
}
}
}},
"all_jobs": {
"terms": {
"field":"job.keyword
}
}
}
}
#Post field. 一条语句,找出所有的job类型。还能找到聚合后符合条件的结果
POST employees/_search
{
"aggs": {
"jobs": {
"terms": {
"field": "job.keyword"
}
}
},
"post_filter": {
"match": {
"job.keyword": "Dev Manager"
}
}
}
#global
POST employees/_search
{
"size": 0,
"query": {
"range": {
"age": {
"gte": 40
}
}
},
"aggs": {
"jobs": {
"terms": {
"field":"job.keyword"
}
},
"all":{
"global":{},
"aggs":{
"salary_avg":{
"avg":{
"field":"salary"
}
}
}
}
}
}
指定order,按照count和key进行排序:
#排序 order
#count and key
POST employees/_search
{
"size": 0,
"query": {
"range": {
"age": {
"gte": 20
}
}
},
"aggs": {
"jobs": {
"terms": {
"field":"job.keyword",
"order":[
{"_count":"asc"},
{"_key":"desc"}
]
}
}
}
}
#排序 order
#count and key
POST employees/_search
{
"size": 0,
"aggs": {
"jobs": {
"terms": {
"field":"job.keyword",
"order":[ {
"avg_salary":"desc"
}]
},
"aggs": {
"avg_salary": {
"avg": {
"field":"salary"
}
}
}
}
}
}
#排序 order
#count and key
POST employees/_search
{
"size": 0,
"aggs": {
"jobs": {
"terms": {
"field":"job.keyword",
"order":[ {
"stats_salary.min":"desc"
}]
},
"aggs": {
"stats_salary": {
"stats": {
"field":"salary"
}
}
}
}
}
}
ElasticSearch在对海量数据进行聚合分析的时候会损失搜索的精准度来满足实时性的需求
在这里插入图片描述
不精准的原因: 数据分散到多个分片,聚合是每个分片的取 Top X
,导致结果不精准。ES 可以不每个分片Top X,而是全量聚合,但势必这会有很大的性能问题。
注意7.x版本已经默认为1
适用场景:数据量小的小集群规模业务场景
官方推荐:size*1.5+10
设置 shard_size 为比较大的值,官方推荐:size*1.5+10。shard_size 值越大,结果越趋近于精准聚合结果值。此外,还可以通过show_term_doc_count_error参数显示最差情况下的错误值,用于辅助确定 shard_size 大小。
适用场景:数据量大、分片数多的集群业务场景
在Terms Aggregation的返回中有两个特殊的数值:
DELETE my_flights
PUT my_flights
{
"settings": {
"number_of_shards": 20
},
"mappings" : {
"properties" : {
"AvgTicketPrice" : {
"type" : "float"
},
"Cancelled" : {
"type" : "boolean"
},
"Carrier" : {
"type" : "keyword"
},
"Dest" : {
"type" : "keyword"
},
"DestAirportID" : {
"type" : "keyword"
},
"DestCityName" : {
"type" : "keyword"
},
"DestCountry" : {
"type" : "keyword"
},
"DestLocation" : {
"type" : "geo_point"
},
"DestRegion" : {
"type" : "keyword"
},
"DestWeather" : {
"type" : "keyword"
},
"DistanceKilometers" : {
"type" : "float"
},
"DistanceMiles" : {
"type" : "float"
},
"FlightDelay" : {
"type" : "boolean"
},
"FlightDelayMin" : {
"type" : "integer"
},
"FlightDelayType" : {
"type" : "keyword"
},
"FlightNum" : {
"type" : "keyword"
},
"FlightTimeHour" : {
"type" : "keyword"
},
"FlightTimeMin" : {
"type" : "float"
},
"Origin" : {
"type" : "keyword"
},
"OriginAirportID" : {
"type" : "keyword"
},
"OriginCityName" : {
"type" : "keyword"
},
"OriginCountry" : {
"type" : "keyword"
},
"OriginLocation" : {
"type" : "geo_point"
},
"OriginRegion" : {
"type" : "keyword"
},
"OriginWeather" : {
"type" : "keyword"
},
"dayOfWeek" : {
"type" : "integer"
},
"timestamp" : {
"type" : "date"
}
}
}
}
POST _reindex
{
"source": {
"index": "kibana_sample_data_flights"
},
"dest": {
"index": "my_flights"
}
}
GET my_flights/_count
GET kibana_sample_data_flights/_search
{
"size": 0,
"aggs": {
"weather": {
"terms": {
"field":"OriginWeather",
"size":5,
"show_term_doc_count_error":true
}
}
}
}
GET my_flights/_search
{
"size": 0,
"aggs": {
"weather": {
"terms": {
"field":"OriginWeather",
"size":5,
"shard_size":10,
"show_term_doc_count_error":true
}
}
}
}
将size设置为2的32次方减去1也就是分片支持的最大值,来解决精度问题。
原因:1.x版本,size等于 0 代表全部,高版本取消 0 值,所以设置了最大值(大于业务的全量值)。
全量带来的弊端就是:如果分片数据量极大,这样做会耗费巨大的CPU 资源来排序,而且可能会阻塞网络。
适用场景:对聚合精准度要求极高的业务场景,由于性能问题,不推荐使用。
适用场景:数据量非常大、聚合精度要求高、响应速度快的业务场景。
适用场景:高基数聚合 。高基数聚合场景中的高基数含义:一个字段包含很大比例的唯一值。
global ordinals 中文翻译成全局序号,是一种数据结构,应用场景如下:
global ordinals 使用一个数值代表字段中的字符串值,然后为每一个数值分配一个 bucket(分桶)。
global ordinals 的本质是:启用 eager_global_ordinals 时,会在刷新(refresh)分片时构建全局序号。这将构建全局序号的成本从搜索阶段转移到了数据索引化(写入)阶段。
创建索引的同时开启:eager_global_ordinals
PUT /my-index
{
"mappings": {
"properties": {
"tags": {
"type": "keyword",
"eager_global_ordinals": true
}
}
}
注意:开启 eager_global_ordinals 会影响写入性能,因为每次刷新时都会创建新的全局序号。为了最大程度地减少由于频繁刷新建立全局序号而导致的额外开销,请调大刷新间隔 refresh_interval
PUT my-index/_settings
{
"index": {
"refresh_interval": "30s"
}
Index sorting (索引排序)可用于在插入时对索引进行预排序,而不是在查询时再对索引进行排序,这将提高范围查询(range query)和排序操作的性能。
在 Elasticsearch 中创建新索引时,可以配置如何对每个分片内的段进行排序。
这是 Elasticsearch 6.X 之后版本才有的特性。
PUT /my_index
{
"settings": {
"index":{
"sort.field": "create_time",
"sort.order": "desc"
}
},
"mappings": {
"properties": {
"create_time":{
"type": "date"
}
}
}
}
注意:预排序将增加 Elasticsearch 写入的成本。在某些用户特定场景下,开启索引预排序会导致大约 40%-50% 的写性能下降。也就是说,如果用户场景更关注写性能的业务,开启索引预排序不是一个很好的选择。
节点查询缓存(Node query cache)可用于有效缓存过滤器(filter)操作的结果。如果多次执行同一 filter 操作,这将很有效,但是即便更改过滤器中的某一个值,也将意味着需要计算新的过滤器结果。
例如,由于 “now” 值一直在变化,因此无法缓存在过滤器上下文中使用 “now” 的查询。
那怎么使用缓存呢?通过在 now 字段上应用 datemath 格式将其四舍五入到最接近的分钟/小时等,可以使此类请求更具可缓存性,以便可以对筛选结果进行缓存。
PUT /my_index/_doc/1
{
"create_time":"2022-05-11T16:30:55.328Z"
}
#下面的示例无法使用缓存
GET /my_index/_search
{
"query":{
"constant_score": {
"filter": {
"range": {
"create_time": {
"gte": "now-1h",
"lte": "now"
}
}
}
}
}
}
# 下面的示例就可以使用节点查询缓存。
GET /my_index/_search
{
"query":{
"constant_score": {
"filter": {
"range": {
"create_time": {
"gte": "now-1h/m",
"lte": "now/m"
}
}
}
}
}
}
上述示例中的“now-1h/m” 就是 datemath 的格式。
如果当前时间 now 是:16:31:29,那么range query 将匹配 my_date 介于:15:31:00 和 15:31:59 之间的时间数据。同理,聚合的前半部分 query 中如果有基于时间查询,或者后半部分 aggs 部分中有基于时间聚合的,建议都使用 datemath 方式做缓存处理以优化性能
聚合语句中,设置:size:0,就会使用分片请求缓存缓存结果。size = 0 的含义是:只返回聚合结果,不返回查询结果。
GET /es_db/_search
{
"size": 0,
"aggs": {
"remark_agg": {
"terms": {
"field": "remark.keyword"
}
}
}
}
Elasticsearch 查询条件中同时有多个条件聚合,默认情况下聚合不是并行运行的。当为每个聚合提供自己的查询并执行 msearch 时,性能会有显著提升。因此,在 CPU 资源不是瓶颈的前提下,如果想缩短响应时间,可以将多个聚合拆分为多个查询,借助:msearch 实现并行聚合。
#常规的多条件聚合实现
GET /employees/_search
{
"size": 0,
"aggs": {
"job_agg": {
"terms": {
"field": "job.keyword"
}
},
"max_salary":{
"max": {
"field": "salary"
}
}
}
}
# msearch 拆分多个语句的聚合实现
GET _msearch
{"index":"employees"}
{"size":0,"aggs":{"job_agg":{"terms":{"field": "job.keyword"}}}}
{"index":"employees"}
{"size":0,"aggs":{"max_salary":{"max":{"field": "salary"}}}}
DELETE /employees
#创建索引库
PUT /employees
{
"mappings": {
"properties": {
"age":{
"type": "integer"
},
"gender":{
"type": "keyword"
},
"job":{
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 50
}
}
},
"name":{
"type": "keyword"
},
"salary":{
"type": "integer"
}
}
}
}
PUT /employees/_bulk
{ "index" : { "_id" : "1" } }
{ "name" : "Emma","age":32,"job":"Product Manager","gender":"female","salary":35000 }
{ "index" : { "_id" : "2" } }
{ "name" : "Underwood","age":41,"job":"Dev Manager","gender":"male","salary": 50000}
{ "index" : { "_id" : "3" } }
{ "name" : "Tran","age":25,"job":"Web Designer","gender":"male","salary":18000 }
{ "index" : { "_id" : "4" } }
{ "name" : "Rivera","age":26,"job":"Web Designer","gender":"female","salary": 22000}
{ "index" : { "_id" : "5" } }
{ "name" : "Rose","age":25,"job":"QA","gender":"female","salary":18000 }
{ "index" : { "_id" : "6" } }
{ "name" : "Lucy","age":31,"job":"QA","gender":"female","salary": 25000}
{ "index" : { "_id" : "7" } }
{ "name" : "Byrd","age":27,"job":"QA","gender":"male","salary":20000 }
{ "index" : { "_id" : "8" } }
{ "name" : "Foster","age":27,"job":"Java Programmer","gender":"male","salary": 20000}
{ "index" : { "_id" : "9" } }
{ "name" : "Gregory","age":32,"job":"Java Programmer","gender":"male","salary":22000 }
{ "index" : { "_id" : "10" } }
{ "name" : "Bryant","age":20,"job":"Java Programmer","gender":"male","salary": 9000}
{ "index" : { "_id" : "11" } }
{ "name" : "Jenny","age":36,"job":"Java Programmer","gender":"female","salary":38000 }
{ "index" : { "_id" : "12" } }
{ "name" : "Mcdonald","age":31,"job":"Java Programmer","gender":"male","salary": 32000}
{ "index" : { "_id" : "13" } }
{ "name" : "Jonthna","age":30,"job":"Java Programmer","gender":"female","salary":30000 }
{ "index" : { "_id" : "14" } }
{ "name" : "Marshall","age":32,"job":"Javascript Programmer","gender":"male","salary": 25000}
{ "index" : { "_id" : "15" } }
{ "name" : "King","age":33,"job":"Java Programmer","gender":"male","salary":28000 }
{ "index" : { "_id" : "16" } }
{ "name" : "Mccarthy","age":21,"job":"Javascript Programmer","gender":"male","salary": 16000}
{ "index" : { "_id" : "17" } }
{ "name" : "Goodwin","age":25,"job":"Javascript Programmer","gender":"male","salary": 16000}
{ "index" : { "_id" : "18" } }
{ "name" : "Catherine","age":29,"job":"Javascript Programmer","gender":"female","salary": 20000}
{ "index" : { "_id" : "19" } }
{ "name" : "Boone","age":30,"job":"DBA","gender":"male","salary": 30000}
{ "index" : { "_id" : "20" } }
{ "name" : "Kathy","age":29,"job":"DBA","gender":"female","salary": 20000}