prometheus-nginxlog-exporter构建Nginx日志监控

lunimous 可以开启 dashborad
rgw 通过 prometheus-nginxlog-exporter 可以解析 Nginx 日志,还有一些正则可以去处理,然后通过 relabel 的配置,让其在指标数据上打上标签。

civetweb 没有在日志记录请求返回的时间,所以考虑用 Nginx 来替换,为了能给 Ceph RGW 加一个请求相关的监控,所以需要去解析日志,在 prometheus-nginxlog-exporter 中,通过配置文件,可以把标签和端口等设置好,然后让 prometheus 加上这个 endpoint 就可以拉出来数据了。

listen {
  port = 4040
}

enable_experimental = true

namespace "nginx" {
  source = {
    files = [
      "/var/log/nginx/access.log"
    ]
  }

  format = "$remote_addr - $remote_user [$time_local] \"$request\" $status $body_bytes_sent \"$request_time\""

  labels {
    app = "default"
  }

  #relabel "request" {
  #  from = "request"
  #}

  relabel "bucket" {
    from = "request"
    split = 2
    match "^/general__lingqu/.*" {
      replacement = "general__lingqu"
    }
  }
}

Nginx 日志。

xx.xx.xxx.x - - [26/Jun/2020:01:23:37 +0800] "GET /general__lingqu/kp/out/390280/n1590907415729/107916/20200601220441/_SUCCESS HTTP/1.1" 200 0 "0.001"
xx.xx.xxx.x - - [26/Jun/2020:01:23:43 +0800] "GET /general__lingqu/kp/out/390280/n1590907415729/107916/20200601220441/part-00000 HTTP/1.1" 200 1165212977 "6.006"
xx.xx.xxx.x - - [26/Jun/2020:01:23:49 +0800] "GET /general__lingqu/kp/out/390280/n1590907415729/107916/20200601220441/part-00001 HTTP/1.1" 200 1180678766 "6.130"
xx.xx.xxx.x - - [26/Jun/2020:01:23:55 +0800] "GET /general__lingqu/kp/out/390280/n1590907415729/107916/20200601220441/part-00002 HTTP/1.1" 200 1181307932 "6.049"
xx.xx.xxx.x - - [26/Jun/2020:01:24:01 +0800] "GET /general__lingqu/kp/out/390280/n1590907415729/107916/20200601220441/part-00003 HTTP/1.1" 200 1207342395 "6.138"
xx.xx.xxx.x - - [26/Jun/2020:01:24:08 +0800] "GET /general__lingqu/kp/out/390280/n1590907415729/107916/20200601220441/part-00004 HTTP/1.1" 200 1198727695 "6.080"
xx.xx.xxx.x - - [26/Jun/2020:01:24:14 +0800] "GET /general__lingqu/kp/out/390280/n1590907415729/107916/20200601220441/part-00005 HTTP/1.1" 200 1206446826 "6.137"
xx.xx.xxx.x - - [26/Jun/2020:01:24:20 +0800] "GET /general__lingqu/kp/out/390280/n1590907415729/107916/20200601220441/part-00006 HTTP/1.1" 200 1204772146 "6.144"
xx.xx.xxx.x - - [26/Jun/2020:01:24:26 +0800] "GET /general__lingqu/kp/out/390280/n1590907415729/107916/20200601220441/part-00007 HTTP/1.1" 200 1165911854 "5.985"
xx.xx.xxx.x - - [26/Jun/2020:01:24:32 +0800] "GET /general__lingqu/kp/out/390280/n1590907415729/107916/20200601220441/part-00008 HTTP/1.1" 200 1179420268 "6.077"

prometheus-nginxlog-exporter 采集到的指标。

# HELP nginx_http_response_count_total Amount of processed HTTP requests
# TYPE nginx_http_response_count_total counter
nginx_http_response_count_total{app="default",bucket="",method="GET",status="200"} 1
nginx_http_response_count_total{app="default",bucket="general__lingqu",method="DELETE",status="204"} 2
nginx_http_response_count_total{app="default",bucket="general__lingqu",method="GET",status="200"} 214
nginx_http_response_count_total{app="default",bucket="general__lingqu",method="HEAD",status="200"} 8474
nginx_http_response_count_total{app="default",bucket="general__lingqu",method="PUT",status="200"} 5
# HELP nginx_http_response_size_bytes Total amount of transferred bytes
# TYPE nginx_http_response_size_bytes counter
nginx_http_response_size_bytes{app="default",bucket="",method="GET",status="200"} 338
nginx_http_response_size_bytes{app="default",bucket="general__lingqu",method="DELETE",status="204"} 0
nginx_http_response_size_bytes{app="default",bucket="general__lingqu",method="GET",status="200"} 2.1549611919e+10
nginx_http_response_size_bytes{app="default",bucket="general__lingqu",method="HEAD",status="200"} 0
nginx_http_response_size_bytes{app="default",bucket="general__lingqu",method="PUT",status="200"} 0
# HELP nginx_http_response_time_seconds Time needed by NGINX to handle requests
# TYPE nginx_http_response_time_seconds summary
nginx_http_response_time_seconds{app="default",bucket="",method="GET",status="200",quantile="0.5"} NaN
nginx_http_response_time_seconds{app="default",bucket="",method="GET",status="200",quantile="0.9"} NaN
nginx_http_response_time_seconds{app="default",bucket="",method="GET",status="200",quantile="0.99"} NaN
nginx_http_response_time_seconds_sum{app="default",bucket="",method="GET",status="200"} 0.002
nginx_http_response_time_seconds_count{app="default",bucket="",method="GET",status="200"} 1
nginx_http_response_time_seconds{app="default",bucket="general__lingqu",method="DELETE",status="204",quantile="0.5"} 0.002
nginx_http_response_time_seconds{app="default",bucket="general__lingqu",method="DELETE",status="204",quantile="0.9"} 0.009
nginx_http_response_time_seconds{app="default",bucket="general__lingqu",method="DELETE",status="204",quantile="0.99"} 0.009
nginx_http_response_time_seconds_sum{app="default",bucket="general__lingqu",method="DELETE",status="204"} 0.011
nginx_http_response_time_seconds_count{app="default",bucket="general__lingqu",method="DELETE",status="204"} 2
nginx_http_response_time_seconds{app="default",bucket="general__lingqu",method="GET",status="200",quantile="0.5"} 0.014
nginx_http_response_time_seconds{app="default",bucket="general__lingqu",method="GET",status="200",quantile="0.9"} 0.112
nginx_http_response_time_seconds{app="default",bucket="general__lingqu",method="GET",status="200",quantile="0.99"} 7.037
nginx_http_response_time_seconds_sum{app="default",bucket="general__lingqu",method="GET",status="200"} 115.89000000000006
nginx_http_response_time_seconds_count{app="default",bucket="general__lingqu",method="GET",status="200"} 214
nginx_http_response_time_seconds{app="default",bucket="general__lingqu",method="HEAD",status="200",quantile="0.5"} 0.002
nginx_http_response_time_seconds{app="default",bucket="general__lingqu",method="HEAD",status="200",quantile="0.9"} 0.002
nginx_http_response_time_seconds{app="default",bucket="general__lingqu",method="HEAD",status="200",quantile="0.99"} 0.024
nginx_http_response_time_seconds_sum{app="default",bucket="general__lingqu",method="HEAD",status="200"} 22.99799999999861
nginx_http_response_time_seconds_count{app="default",bucket="general__lingqu",method="HEAD",status="200"} 8474
nginx_http_response_time_seconds{app="default",bucket="general__lingqu",method="PUT",status="200",quantile="0.5"} NaN
nginx_http_response_time_seconds{app="default",bucket="general__lingqu",method="PUT",status="200",quantile="0.9"} NaN
nginx_http_response_time_seconds{app="default",bucket="general__lingqu",method="PUT",status="200",quantile="0.99"} NaN
nginx_http_response_time_seconds_sum{app="default",bucket="general__lingqu",method="PUT",status="200"} 0.839
nginx_http_response_time_seconds_count{app="default",bucket="general__lingqu",method="PUT",status="200"} 5

Reference

  1. https://github.com/martin-helmich/prometheus-nginxlog-exporter
  2. https://www.martin-helmich.de/en/blog/monitoring-nginx.html

你可能感兴趣的:(Ceph)