prometheus2 alertmanager rules demo

groups:

  • name: targets
    rules:

    • alert: EndpointDown
      expr: probe_success == 0
      for: 10s
      labels:
      severity: "critical"
      annotations:
      summary: "Endpoint {{ $labels.instance }} down"
  • name: host-cpu
    rules:

    • alert: high_cpu_load
      expr: node_load1 > 1.5
      for: 30s
      labels:
      severity: warning
      annotations:
      summary: "Server under high load"
      description: "Docker host is under high load, the avg load 1m is at {{ labels.instance }} of job {{ $labels.job }}."

    • alert: high_memory_load
      expr: (sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100 > 85
      for: 30s
      labels:
      severity: warning
      annotations:
      summary: "Server memory is almost full"
      description: "Docker host memory usage is {{ humanize labels.instance }} of job {{ $labels.job }}."

    • alert: high_storage_load
      expr: (node_filesystem_size_bytes{fstype="aufs"} - node_filesystem_free_bytes{fstype="aufs"}) / node_filesystem_size_bytes{fstype="aufs"} * 100 > 85
      for: 30s
      labels:
      severity: warning
      annotations:
      summary: "Server storage is almost full"
      description: "Docker host storage usage is {{ humanize labels.instance }} of job {{ $labels.job }}."

你可能感兴趣的:(prometheus2 alertmanager rules demo)