elastic search中的bucket和metric

  • 概念

bucket:按照某个字段进行bucket划分,那个字段的值相同的那些数据,就会被划分到一个bucket中;

metric:对一个bucket执行的某种聚合分析的操作,比如说求平均值,求最大值,求最小值

对这两个与sql语句进行类比:

select count(*) from access_log group by user_id

bucket:group by user_id --> 那些user_id相同的数据,就会被划分到一个bucket中
metric:count(*),对每个user_id bucket中所有的数据,计算一个数量

  • 聚合数据分析一:
PUT /tvs
{
	"mappings": {
		"sales": {
			"properties": {
				"price": {
					"type": "long"
				},
				"color": {
					"type": "keyword"
				},
				"brand": {
					"type": "keyword"
				},
				"sold_date": {
					"type": "date"
				}
			}
		}
	}
}

POST /tvs/sales/_bulk
{ "index": {}}
{ "price" : 1000, "color" : "红色", "brand" : "长虹", "sold_date" : "2016-10-28" }
{ "index": {}}
{ "price" : 2000, "color" : "红色", "brand" : "长虹", "sold_date" : "2016-11-05" }
{ "index": {}}
{ "price" : 3000, "color" : "绿色", "brand" : "小米", "sold_date" : "2016-05-18" }
{ "index": {}}
{ "price" : 1500, "color" : "蓝色", "brand" : "TCL", "sold_date" : "2016-07-02" }
{ "index": {}}
{ "price" : 1200, "color" : "绿色", "brand" : "TCL", "sold_date" : "2016-08-19" }
{ "index": {}}
{ "price" : 2000, "color" : "红色", "brand" : "长虹", "sold_date" : "2016-11-05" }
{ "index": {}}
{ "price" : 8000, "color" : "红色", "brand" : "三星", "sold_date" : "2017-01-01" }
{ "index": {}}
{ "price" : 2500, "color" : "蓝色", "brand" : "小米", "sold_date" : "2017-02-12" }

2、统计哪种颜色的电视销量最高

GET /tvs/sales/_search
{
    "size" : 0,
    "aggs" : { 
        "popular_colors" : { 
            "terms" : { 
              "field" : "color"
            }
        }
    }
}

size:只获取聚合结果,而不要执行聚合的原始数据
aggs:固定语法,要对一份数据执行分组聚合操作

popular_colors:就是对每个aggs,都要起一个名字,这个名字是随机的
terms:根据字段的值进行分组
field:根据指定的字段的值进行分组

返回结果:

{
  "took" : 3,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 8,
    "max_score" : 0.0,
    "hits" : [ ]
  },
  "aggregations" : {
    "popular_color" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "红色",
          "doc_count" : 4
        },
        {
          "key" : "绿色",
          "doc_count" : 2
        },
        {
          "key" : "蓝色",
          "doc_count" : 2
        }
      ]
    }
  }
}

默认的排序规则:按照doc_count降序排序

3、统计每种颜色电视的平均价格

GET /tvs/sales/_search
{
  "size": 0,
  "aggs": {
    "popular_color": {
      "terms": {
        "field": "color",
        "size": 10
      },
      "aggs": {
        "ave_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}

3、多层次下钻分析

下钻分析:已经分了一个组了,比如说颜色的分组,然后还要继续对这个分组内的数据,再分组,比如一个颜色内,还可以分成多个不同的品牌的组,最后对每个最小粒度的分组执行聚合分析操作,这就叫做下钻分析
例子:从颜色到品牌进行下钻分析,每种颜色的平均价格,以及找到每种颜色每个品牌的平均价格

GET /tvs/sales/_search
{
  "size": 0,
  "aggs": {
    "popular_color": {
      "terms": {
        "field": "color",
        "size": 10
      },
      "aggs": {
        "ave_price": {
          "avg": {
            "field": "price"
          }
        },
        "group_by_brand": {
          "terms": {
            "field": "brand",
            "size": 10
          },
          "aggs":{
            "brand_avg_price":{
              "avg": {
                "field": "price"
              }
            }
        }
        }
      }
    }
  }
}

4、统计每种颜色电视的最大最小价格

GET /tvs/sales/_search
{
  "size": 0,
  "aggs": {
    "group_by_color": {
      "terms": {
        "field": "color",
        "size": 10
      },
      "aggs": {
        "max_price": {
          "max": {
            "field": "price"
          }
        },
        "min_price":{
          "min": {
            "field": "price"
          }
        },
        "sum_price":{
          "sum": {
            "field": "price"
          }
        }
      }
    }
  }
}

4、使用histogram按价格区间统计电视销量和销售额

histogram:类似于terms,也是进行bucket分组操作,接收一个field,按照这个field的值的各个范围区间,进行bucket分组操作

"histogram":{ 
  "field": "price",
  "interval": 2000
},

interval:2000,划分范围,0~2000,2000~4000,4000~6000,6000~8000,8000~10000,buckets

GET /tvs/sales/_search
{
  "size": 0,
  "aggs": {
    "price": {
      "histogram": {
        "field": "price",
        "interval": 2000
      },
      "aggs": {
        "revenue": {
          "sum": {
            "field": "price"
          }
        }
      }
    }
  }
}

5、使用date_histogram统计每月电视的销量

按照我们指定的某个date类型的日期field,以及日期interval,按照一定的日期间隔,去划分bucket;

GET /tvs/sales/_search
{
  "size": 0,
  "aggs": {
    "group_by_soldDate": {
      "date_histogram": {
        "field": "sold_date",
        "interval": "month",
        "format": "yyyy-MM-dd",
        "min_doc_count": 0,
        "extended_bounds": {
          "min": "2017-01-01",
          "max": "2017-12-31"
        }
      }
    }
  }
}


min_doc_count:即使某个日期interval,2017-01-01~2017-01-31中,一条数据都没有,那么这个区间也是要返回的,不然默认是会过滤掉这个区间的
extended_bounds,min,max:划分bucket的时候,会限定在这个起始日期,和截止日期内

6、统计每季度每个品牌的销售额

GET /tvs/sales/_search
{
  "size": 0,
  "aggs": {
    "group_by_quarter": {
      "date_histogram": {
        "field": "sold_date",
        "interval": "quarter",
        "format": "yyyy-MM-dd", 
        "min_doc_count": 0,
        "extended_bounds": {
          "min": "2016-01-01",
          "max": "2017-12-31"
        }
      },
      "aggs":{
      "group_by_brand":{
        "terms": {
          "field": "brand"
        },
        "aggs": {
          "per_brand_price": {
            "sum": {
              "field": "price"
            }
          }
        }
      },
      "total_sum_quarter":{
        "sum": {
          "field": "price"
        }
      }
    }
    }
  }
}

7、统计指定品牌下每种颜色的销量

GET /tvs/sales/_search
{
  "size": 0, 
  "query": {
    "term": {
      "brand": {
        "value": "小米"
      }
    }
  },
  "aggs": {
    "group_by_color": {
      "terms": {
        "field": "color"
      }
    }
  }
}

7、_global bucket:单个品牌与所有品牌销量对比

GET /tvs/sales/_search
{
  "size": 0,
  "query": {
    "term": {
      "brand": {
        "value": "长虹"
      }
    }
  },
  "aggs": {
    "changhong_avg_price": {
      "avg": {
        "field": "price"
      }
    },
    "all":{
      "global": {},
      "aggs": {
        "all_brand_ave_price":{
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}

8、过滤+聚合,统计价格大于1200的平均价格

GET /tvs/sales/_search
{
  "size": 0,
  "query": {
    "constant_score": {
      "filter": {
        "range": {
          "price": {
            "gte": 1200
          }
        }
      },
      "boost": 1
    }
  },
  "aggs": {
    "avg_price": {
      "avg": {
        "field": "price"
      }
    }
  }
}

9、统计品牌最近一个月的平均价格

GET /tvs/sales/_search
{
  "size": 0, 
  "query": {
    "term": {
      "brand": {
        "value": "长虹"
      }
    }
  },
  "aggs": {
    "recent_150d": {
      "filter": {
        "range": {
          "sold_date": {
            "gte": "now-3000d"
          }
        }
      },
      "aggs": {
        "recent_3000d_ave_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}

10、统计出来每个颜色的电视的销售额,需要按照销售额降序排序

GET /tvs/sales/_search
{
  "size": 0, 
  "aggs": {
    "group_by_color": {
      "terms": {
        "field": "color",
        "order": {
          "avg_price": "desc"
        }
      },
      "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
}
}

11、按下钻最深层次的metric排序

GET /tvs/sales/_search 
{
  "size": 0,
  "aggs": {
    "group_by_color": {
      "terms": {
        "field": "color"
      },
      "aggs": {
        "group_by_brand": {
          "terms": {
            "field": "brand",
            "order": {
              "avg_price": "desc"
            }
          },
          "aggs": {
            "avg_price": {
              "avg": {
                "field": "price"
              }
            }
          }
        }
      }
    }
  }
}

 

你可能感兴趣的:(elastic,search)