BAT大牛亲授 基于ElasticSearch的搜房网实战

download:BAT大牛亲授 基于ElasticSearch的搜房网实战

Elasticsearch 统计代码例子
aggs
avg 均匀数
最近15分钟的均匀访问时间,upstream_time_ms是每次访问时间,单位毫秒

{
"query": {

"filtered": {
  "filter": {
    "range": {
      "@timestamp": {
        "gt": "now-15m",
        "lt": "now"
      }
    }
  }
}

},
"aggs": {

"execute_time": {
  "avg": {
    "field": "upstream_time_ms"
  }
}

}
}
//当然你也能够直接将过滤器写在aggs里面
{
"size": 0,
"aggs": {

"filtered_aggs": {
  "filter": {
    "range": {
      "@timestamp": {
        "gt": "now-15m",
        "lt": "now"
      }
    }
  },
  "aggs": {
    "execute_time": {
      "avg": {
        "field": "upstream_time_ms"
      }
    }
  }
}

}
}
cardinality 基数,比方计算uv
你可能留意到了size:0,假如你只需求统计数据,不要数据自身,就设置它,这不是我投机倒把,官方文档也是这么干的。

{
"size": 0,
"aggs": {

"filtered_aggs": {
  "filter": {
    "range": {
      "@timestamp": {
        "gt": "now-15m",
        "lt": "now"
      }
    }
  },
  "aggs": {
    "ipv": {
      "cardinality": {
        "field": "ip"
      }
    }
  }
}

}
}
percentiles 基于百分比统计
最近15分钟,99.9的恳求的执行时间不超越几

{
"size": 0,
"query": {

"filtered": {
  "filter": {
    "range": {
      "@timestamp": {
        "gt": "now-15m",
        "lt": "now"
      }
    }
  }
}

},
"aggs": {

"execute_time": {
  "percentiles": {
    "field": "upstream_time_ms",
    "percents": [
      90,
      95,
      99.9
    ]
  }
}

}
}
//返回值,0.1%的恳求超越了159ms
{
"took": 620,
"timed_out": false,
"_shards": {

"total": 5,
"successful": 5,
"failed": 0

},
"hits": {

"total": 679400,
"max_score": 0,
"hits": []

},
"aggregations": {

"execute_time": {
  "values": {
    "90.0": 24.727003484320534,
    "95.0": 72.6200981699678,
    "99.9": 159.01065773524886 //99.9的数据落在159以内,是系统计算出来159
  }
}

}
}
percentile_ranks 指定一个范围,有几数据落在这里
{
"size": 0,
"query": {

"filtered": {
  "filter": {
    "range": {
      "@timestamp": {
        "gt": "now-15m",
        "lt": "now"
      }
    }
  }
}

},
"aggs": {

"execute_time": {
  "percentile_ranks": {
    "field": "upstream_time_ms",
    "values": [
      50,
      160
    ]
  }
}

}
}
//返回值
{
"took": 666,
"timed_out": false,
"_shards": {

"total": 5,
"successful": 5,
"failed": 0

},
"hits": {

"total": 681014,
"max_score": 0,
"hits": []

},
"aggregations": {

"execute_time": {
  "values": {
    "50.0": 94.14716385885366,
    "160.0": 99.91130872493076 //99.9的数据落在了160以内,这次,160是我指定的,系统计算出99.9
  }
}

}
}
统计最近15分钟,不同的链接恳求时间大小
{
"size": 0,
"query": {

"filtered": {
  "filter": {
    "range": {
      "@timestamp": {
        "gt": "now-15m",
        "lt": "now"
      }
    }
  }
}

},
"aggs": {

"execute_time": {
  "terms": {
    "field": "uri"
  },
  "aggs": {
    "avg_time": {
      "avg": {
        "field": "upstream_time_ms"
      }
    }
  }
}

}
}
//返回,看起来url1 比 url2慢一点(avg_time),不过url1的恳求量比拟大 (doc_count)
{
"took": 1655,
"timed_out": false,
"_shards": {

"total": 5,
"successful": 5,
"failed": 0

},
"hits": {

"total": 710802,
"max_score": 0,
"hits": []

},
"aggregations": {

"execute_time": {
  "doc_count_error_upper_bound": 10,
  "sum_other_doc_count": 347175,
  "buckets": [
    {
      "key": "/url1",
      "doc_count": 362688,
      "avg_time": {
        "value": 6.601660380271749
      }
    },
    {
      "key": "/url2",
      "doc_count": 939,
      "avg_time": {
        "value": 5.313099041533547
      }
    }
  ]
}

}
}
找出url响应最慢的前2名
{
"size": 0,
"query": {

"filtered": {
  "filter": {
    "range": {
      "@timestamp": {
        "gt": "now-15m",
        "lt": "now"
      }
    }
  }
}

},
"aggs": {

"execute_time": {
  "terms": {
    "size": 2,
    "field": "uri",
    "order": {
      "avg_time": "desc"
    }
  },
  "aggs": {
    "avg_time": {
      "avg": {
        "field": "upstream_time_ms"
      }
    }
  }
}

}
}
//返回值
{
"took": 1622,
"timed_out": false,
"_shards": {

"total": 5,
"successful": 5,
"failed": 0

},
"hits": {

"total": 748712,
"max_score": 0,
"hits": []

},
"aggregations": {

"execute_time": {
  "doc_count_error_upper_bound": -1,
  "sum_other_doc_count": 748710,
  "buckets": [
    {
      "key": "url_shit",
      "doc_count": 123,
      "avg_time": {
        "value": 8884
      }
    },
    {
      "key": "url_shit2",
      "doc_count": 456,
      "avg_time": {
        "value": 8588
      }
    }
  ]
}

}
}
value_count 文档数量
相当于
select count(*) from table group by uri,为了到达这个目的,只需求把上文中,avg 换成value_count。不过avg的时分,结果中的doc_count其实到达了同样效果。

怎样取数据画个图?比方:最近2分钟,每20秒的时间窗口中,均匀响应时间是几
{
"size": 0,
"query": {

"filtered": {
  "filter": {
    "range": {
      "@timestamp": {
        "gt": "now-2m",
        "lt": "now"
      }
    }
  }
}

},
"aggs": {

"execute_time": {
  "date_histogram": {
    "field": "@timestamp",
    "interval": "20s"
  },
  "aggs": {
    "avg_time": {
      "avg": {
        "field": "upstream_time_ms"
      }
    }
  }
}

}
}
pv 分时统计图(每小时一统计)
周期大小对性能影响不大

{
"size":0,
"fields":false,
"aggs": {

"execute_time": {
  "date_histogram": {
    "field": "@timestamp",
    "interval": "1h"
  }
}

}
}

你可能感兴趣的:(elasticsearch)