Kubernetes 部署文件: prometheus + grafana + kube-state-metrics + node-exporter

Prometheus


# Source: istio/charts/prometheus/templates/serviceaccount.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
  name: prometheus
  namespace: monitor
  labels:
    app: prometheus

---


# Source: istio/charts/prometheus/templates/clusterrole.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: prometheus-istio-system
  labels:
    app: prometheus
rules:
- apiGroups: [""]
  resources:
  - nodes
  - services
  - endpoints
  - pods
  - nodes/proxy
  verbs: ["get", "list", "watch"]
- apiGroups: [""]
  resources:
  - configmaps
  verbs: ["get"]
- nonResourceURLs: ["/metrics"]
  verbs: ["get"]

---  
# Source: istio/charts/prometheus/templates/clusterrolebindings.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: prometheus-istio-system
  labels:
    app: prometheus
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: prometheus-istio-system
subjects:
- kind: ServiceAccount
  name: prometheus
  namespace: monitor

---
kind: ConfigMap
apiVersion: v1
metadata:
  name: prometheus
  namespace: monitor
  labels:
    name: prometheus
data:
  prometheus.yml: |-
    global:
      scrape_interval: 10s
      evaluation_interval: 10s
    scrape_configs:
      - job_name: 'kubernetes-etcd'
        kubernetes_sd_configs:
        - role: endpoints
        scheme: https
        tls_config:
          cert_file: /etc/etcd-certs/server.crt
          key_file: /etc/etcd-certs/server.key
          insecure_skip_verify: true
        bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
        relabel_configs:
        - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_service_annotation_prometheus_io_scrape]
          action: keep
          regex: kube-system;etcd-cluster;true

      - job_name: 'kubernetes-apiservers'
        kubernetes_sd_configs:
        - role: endpoints
        scheme: https
        tls_config:
          ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
        relabel_configs:
        - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
          action: keep
          regex: default;kubernetes;https

      - job_name: 'kubernetes-controller-manager'
        kubernetes_sd_configs:
        - role: endpoints
        scheme: https
        tls_config:
          ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
        relabel_configs:
        - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_service_annotation_prometheus_io_scrape]
          action: keep
          regex: kube-system;controller-manager;true
      - job_name: 'kubernetes-scheduler'
        kubernetes_sd_configs:
        - role: endpoints
        scheme: http
        tls_config:
          ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
        relabel_configs:
        - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_service_annotation_prometheus_io_scrape]
          action: keep
          regex: kube-system;kube-scheduler;true

      - job_name: 'kubernetes-proxy'
        kubernetes_sd_configs:
        - role: endpoints
        scheme: https
        tls_config:
          ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
        relabel_configs:
        - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_service_annotation_prometheus_io_scrape]
          action: keep
          regex: kube-system;kube-proxy;true

      - job_name: 'kubernetes-kubelet'
        kubernetes_sd_configs:
        - role: node
        scheme: https
        tls_config:
          ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
        relabel_configs:
        - action: labelmap
          regex: __meta_kubernetes_node_label_(.+)
        - target_label: __address__
          # 地址替换为kubernetes api service 集群内部域名.
          replacement: kubernetes.default.svc:443
        - source_labels: [__meta_kubernetes_node_name]
          regex: (.+)
          target_label: __metrics_path__
          replacement: /api/v1/nodes/${1}/proxy/metrics
          
      # Scrape config for Kubelet cAdvisor.
      #
      # This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics
      # (those whose names begin with 'container_') have been removed from the
      # Kubelet metrics endpoint.  This job scrapes the cAdvisor endpoint to
      # retrieve those metrics.
      #
      # In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor
      # HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics"
      # in that case (and ensure cAdvisor's HTTP server hasn't been disabled with
      # the --cadvisor-port=0 Kubelet flag).
      #
      # This job is not necessary and should be removed in Kubernetes 1.6 and
      # earlier versions, or it will cause the metrics to be scraped twice.
      - job_name: 'kubernetes-cadvisor'
        scheme: https
        tls_config:
          ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
        kubernetes_sd_configs:
        - role: node
        relabel_configs:
        - action: labelmap
          regex: __meta_kubernetes_node_label_(.+)
        - target_label: __address__
          # 地址替换为kubernetes api service 集群内部域名.
          replacement: kubernetes.default.svc:443
        - source_labels: [__meta_kubernetes_node_name]
          regex: (.+)
          target_label: __metrics_path__
          replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor

      # Istio 规则
      - job_name: 'istio-mesh'
        # Override the global default and scrape targets from this job every 5 seconds.
        scrape_interval: 5s

        kubernetes_sd_configs:
        - role: endpoints
          namespaces:
            names:
            - istio-system

        relabel_configs:
        - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
          action: keep
          regex: istio-telemetry;prometheus

      # Scrape config for envoy stats
      - job_name: 'envoy-stats'
        metrics_path: /stats/prometheus
        kubernetes_sd_configs:
        - role: pod

        relabel_configs:
        - source_labels: [__meta_kubernetes_pod_container_port_name]
          action: keep
          regex: '.*-envoy-prom'
        - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
          action: replace
          regex: ([^:]+)(?::\d+)?;(\d+)
          replacement: $1:15090
          target_label: __address__
        - action: labelmap
          regex: __meta_kubernetes_pod_label_(.+)
        - source_labels: [__meta_kubernetes_namespace]
          action: replace
          target_label: namespace
        - source_labels: [__meta_kubernetes_pod_name]
          action: replace
          target_label: pod_name

        metric_relabel_configs:
        # Exclude some of the envoy metrics that have massive cardinality
        # This list may need to be pruned further moving forward, as informed
        # by performance and scalability testing.
        - source_labels: [ cluster_name ]
          regex: '(outbound|inbound|prometheus_stats).*'
          action: drop
        - source_labels: [ tcp_prefix ]
          regex: '(outbound|inbound|prometheus_stats).*'
          action: drop
        - source_labels: [ listener_address ]
          regex: '(.+)'
          action: drop
        - source_labels: [ http_conn_manager_listener_prefix ]
          regex: '(.+)'
          action: drop
        - source_labels: [ http_conn_manager_prefix ]
          regex: '(.+)'
          action: drop
        - source_labels: [ __name__ ]
          regex: 'envoy_tls.*'
          action: drop
        - source_labels: [ __name__ ]
          regex: 'envoy_tcp_downstream.*'
          action: drop
        - source_labels: [ __name__ ]
          regex: 'envoy_http_(stats|admin).*'
          action: drop
        - source_labels: [ __name__ ]
          regex: 'envoy_cluster_(lb|retry|bind|internal|max|original).*'
          action: drop


      - job_name: 'istio-policy'
        # Override the global default and scrape targets from this job every 5 seconds.
        scrape_interval: 5s
        # metrics_path defaults to '/metrics'
        # scheme defaults to 'http'.

        kubernetes_sd_configs:
        - role: endpoints
          namespaces:
            names:
            - istio-system


        relabel_configs:
        - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
          action: keep
          regex: istio-policy;http-monitoring

      - job_name: 'istio-telemetry'
        # Override the global default and scrape targets from this job every 5 seconds.
        scrape_interval: 5s
        # metrics_path defaults to '/metrics'
        # scheme defaults to 'http'.

        kubernetes_sd_configs:
        - role: endpoints
          namespaces:
            names:
            - istio-system

        relabel_configs:
        - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
          action: keep
          regex: istio-telemetry;http-monitoring

      - job_name: 'pilot'
        # Override the global default and scrape targets from this job every 5 seconds.
        scrape_interval: 5s
        # metrics_path defaults to '/metrics'
        # scheme defaults to 'http'.

        kubernetes_sd_configs:
        - role: endpoints
          namespaces:
            names:
            - istio-system

        relabel_configs:
        - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
          action: keep
          regex: istio-pilot;http-monitoring

      - job_name: 'galley'
        # Override the global default and scrape targets from this job every 5 seconds.
        scrape_interval: 5s
        # metrics_path defaults to '/metrics'
        # scheme defaults to 'http'.

        kubernetes_sd_configs:
        - role: endpoints
          namespaces:
            names:
            - istio-system

        relabel_configs:
        - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
          action: keep
          regex: istio-galley;http-monitoring

      - job_name: 'kubernetes-pods-istio-secure'
        scheme: https
        tls_config:
          ca_file: /etc/istio-certs/root-cert.pem
          cert_file: /etc/istio-certs/cert-chain.pem
          key_file: /etc/istio-certs/key.pem
          insecure_skip_verify: true  # prometheus does not support secure naming.
        kubernetes_sd_configs:
        - role: pod
        relabel_configs:
        - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
          action: keep
          regex: true
        # sidecar status annotation is added by sidecar injector and
        # istio_workload_mtls_ability can be specifically placed on a pod to indicate its ability to receive mtls traffic.
        - source_labels: [__meta_kubernetes_pod_annotation_sidecar_istio_io_status, __meta_kubernetes_pod_annotation_istio_mtls]
          action: keep
          regex: (([^;]+);([^;]*))|(([^;]*);(true))
        - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
          action: replace
          target_label: __metrics_path__
          regex: (.+)
        - source_labels: [__address__]  # Only keep address that is host:port
          action: keep    # otherwise an extra target with ':443' is added for https scheme
          regex: ([^:]+):(\d+)
        - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
          action: replace
          regex: ([^:]+)(?::\d+)?;(\d+)
          replacement: $1:$2
          target_label: __address__
        - action: labelmap
          regex: __meta_kubernetes_pod_label_(.+)
        - source_labels: [__meta_kubernetes_namespace]
          action: replace
          target_label: namespace
        - source_labels: [__meta_kubernetes_pod_name]
          action: replace
          target_label: pod_name

      # APP 规则
      - job_name: 'applications-endpoints-metrics'
        tls_config:
          insecure_skip_verify: true
          ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
        kubernetes_sd_configs:
        - role: endpoints
        relabel_configs:
        - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape, __meta_kubernetes_service_annotation_prometheus_io_app_metrics]
          regex: true;true
          action: keep
        - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
          action: replace
          regex: (https?)
          target_label: __scheme__
        - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_app_metrics_path]
          action: replace
          target_label: __metrics_path__
          regex: (.+)
        - source_labels: [__meta_kubernetes_pod_ip, __meta_kubernetes_service_annotation_prometheus_io_app_metrics_port]
          action: replace
          target_label: __address__
          regex: (.+);(.+)
          replacement: $1:$2
        - source_labels: [__meta_kubernetes_namespace]
          action: replace
          target_label: kubernetes_namespace
        - source_labels: [__meta_kubernetes_service_name]
          action: replace
          target_label: kubernetes_name
        - action: labelmap
          regex: __meta_kubernetes_service_label_(.+)
        - action: labelmap
          regex: __meta_kubernetes_service_annotation_prometheus_io_app_info_(.+)

      - job_name: 'applications-pod-metrics'
        kubernetes_sd_configs:
        - role: pod
        relabel_configs:
        - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
          action: keep
          regex: true
        - source_labels: [__meta_kubernetes_pod_annotation_sidecar_istio_io_status]
          action: drop
          regex: (.+)
        - source_labels: [__meta_kubernetes_pod_annotation_istio_mtls]
          action: drop
          regex: (true)
        - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
          action: replace
          target_label: __metrics_path__
          regex: (.+)
        - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
          action: replace
          regex: ([^:]+)(?::\d+)?;(\d+)
          replacement: $1:$2
          target_label: __address__
        - action: labelmap
          regex: __meta_kubernetes_pod_label_(.+)
        - source_labels: [__meta_kubernetes_namespace]
          action: replace
          target_label: kubernetes_namespace
        - source_labels: [__meta_kubernetes_pod_name]
          action: replace
          target_label: kubernetes_pod_name
        - source_labels: [__meta_kubernetes_namespace]
          action: replace
          target_label: namespace
        - source_labels: [__meta_kubernetes_pod_name]
          action: replace
          target_label: pod_name

      # 自定义规则1
      - job_name: 'kubernetes-service-endpoints'
        kubernetes_sd_configs:
        - role: endpoints
        relabel_configs:
        - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
          action: keep
          regex: true
        - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape, __meta_kubernetes_namespace, __meta_kubernetes_endpoint_port_name]
          # 匹配过滤drop掉系统组件服务的监控收集(另外的job会收集这些数据)
          regex: true;kube-system;kubernetes-components-metrics
          action: drop
        - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape, __meta_kubernetes_namespace]
          # 匹配过滤 drop 掉 node-exporter 监控指标数据收集(另外的job会收集这些数据)
          regex: true;monitor
          action: drop
        - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
          action: replace
          regex: (https?)
          target_label: __scheme__
        - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
          action: replace
          target_label: __metrics_path__
          regex: (.+)
        - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
          action: replace
          target_label: __address__
          regex: ([^:]+)(?::\d+)?;(\d+)
          replacement: $1:$2
        - action: labelmap
          regex: __meta_kubernetes_service_label_(.+)
        - source_labels: [__meta_kubernetes_namespace]
          action: replace
          target_label: kubernetes_namespace
        - source_labels: [__meta_kubernetes_service_name]
          action: replace
          target_label: kubernetes_name

      # 自定义规则2
      - job_name: 'applications-service-http-probe'
        tls_config:
          ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
        kubernetes_sd_configs:
        - role: service
        metrics_path: /probe
        params:
          module: [http_2xx]
        relabel_configs:
        - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape, __meta_kubernetes_service_annotation_prometheus_io_http_probe]
          regex: true;true
          action: keep
        - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_namespace, __meta_kubernetes_service_annotation_prometheus_io_http_probe_port, __meta_kubernetes_service_annotation_prometheus_io_http_probe_path]
          action: replace
          target_label: __param_target
          regex: (.+);(.+);(.+);(.+)
          replacement: $1.$2:$3$4
        #- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_http_probe_path]
        #  action: replace
        #  target_label: __param_target
        #  regex: (.+);(.+)
        #  replacement: $1$2
        - target_label: __address__
          replacement: blackbox-exporter:9115
        - source_labels: [__param_target]
          target_label: instance
        - action: labelmap
          regex: __meta_kubernetes_service_annotation_prometheus_io_app_info_(.+)
        #- source_labels: [__meta_kubernetes_namespace]
        #  target_label: kubernetes_namespace
        #- source_labels: [__meta_kubernetes_service_name]
        #  target_label: kubernetes_name
      - job_name: 'applications-service-tcp-probe'
        tls_config:
          ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
        bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
        kubernetes_sd_configs:
        - role: service
        metrics_path: /probe
        params:
          module: [tcp_connect]
        relabel_configs:
        - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape, __meta_kubernetes_service_annotation_prometheus_io_tcp_probe]
          regex: true;true
          action: keep
        - source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_namespace, __meta_kubernetes_service_annotation_prometheus_io_tcp_probe_port]
          action: replace
          target_label: __param_target
          regex: (.+);(.+);(.+)
          replacement: $1.$2:$3
        #- source_labels: [__address__]
        #  target_label: __param_target
        - target_label: __address__
          replacement: blackbox-exporter:9115
        - source_labels: [__param_target]
          target_label: instance
        - action: labelmap
          regex: __meta_kubernetes_service_annotation_prometheus_io_app_info_(.+)

---
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    name: prometheus
    version: latest
  name: prometheus
  namespace: monitor
spec:
  replicas: 1
  selector:
    matchLabels:
      app.kubernetes.io/name: prometheus
      app.kubernetes.io/release: prometheus
  template:
    metadata:
      labels:
        app.kubernetes.io/name: prometheus
        app.kubernetes.io/release: prometheus
    spec:
      serviceAccountName: prometheus
      containers:
      - image: prom/prometheus
        name: prometheus
        command:
        - "/bin/prometheus"
        args:
        - "--config.file=/etc/prometheus/prometheus.yml"
        - "--storage.tsdb.path=/prometheus"
        - "--storage.tsdb.retention=48h"
        ports:
        - containerPort: 9090
          protocol: TCP
        volumeMounts:
        - mountPath: "/prometheus"
          name: data
        - mountPath: "/etc/prometheus"
          name: config-volume
        - mountPath: "/etc/etcd-certs"
          name: etcd-certs
        - name: istio-certs
          mountPath: /etc/istio-certs
        resources:
          requests:
            cpu: 200m
            memory: 200Mi
          limits:
            cpu: 1000m
            memory: 4000Mi
      volumes:
      - name: data
        emptyDir: {}
      - name: config-volume
        configMap:
          name: prometheus
      - name: etcd-certs
        emptyDir: {}
      - name: istio-certs
        emptyDir: {}

---

apiVersion: v1
kind: Service
metadata:
  name: prometheus
  labels:
    app: prometheus
  annotations:
    prometheus.io/scrape: 'true'
    prometheus.io/tcp-probe: 'true'
    prometheus.io/tcp-probe-port: '80'
  namespace: monitor
spec:
  ports:
  - port: 9090
    targetPort: 9090
  selector:
    app.kubernetes.io/name: prometheus
    app.kubernetes.io/release: prometheus

---

Grafana

apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  labels:
    app: grafana
    version: latest
    app.kubernetes.io/name: grafana
    app.kubernetes.io/release: grafana
  name: grafana
spec:
  replicas: 1
  selector:
    matchLabels:
      app.kubernetes.io/name: grafana
      app.kubernetes.io/release: grafana
  template:
    metadata:
      labels:
        app.kubernetes.io/name: grafana
        app.kubernetes.io/release: grafana
    spec:
      containers:
      - name: grafana
        image: grafana/grafana:latest
        imagePullPolicy: IfNotPresent
        ports:
        - containerPort: 3000
          protocol: TCP
        volumeMounts:
        - name: grafana-storage
          mountPath: /var/lib/grafana
        env:
        - name: GF_SERVER_HTTP_PORT
          value: "3000"
          # The following env variables are required to make Grafana accessible via
          # the kubernetes api-server proxy. On production clusters, we recommend
          # removing these env variables, setup auth for grafana, and expose the grafana
          # service using a LoadBalancer or a public IP.
        - name: GF_AUTH_BASIC_ENABLED
          value: "false"
        - name: GF_AUTH_ANONYMOUS_ENABLED
          value: "true"
        - name: GF_AUTH_ANONYMOUS_ORG_ROLE
          value: Admin
        - name: GF_SERVER_ROOT_URL
          # If you're only using the API Server proxy, set this value instead:
          # value: /api/v1/namespaces/kube-system/services/monitoring-grafana/proxy
          value: /
      volumes:
      - name: grafana-storage
        emptyDir: {}

---

apiVersion: v1
kind: Service
metadata:
  name: grafana
  labels:
    app: grafana
    version: latest
    app.kubernetes.io/name: grafana
    app.kubernetes.io/release: grafana
  annotations:
    prometheus.io/scrape: 'true'
    prometheus.io/tcp-probe: 'true'
    prometheus.io/tcp-probe-port: '80'
spec:
  ports:
  - port: 80
    targetPort: 3000
  selector:
    app.kubernetes.io/name: grafana
    app.kubernetes.io/release: grafana

---

Kube-state-metrics


# Kube-state-metrics

apiVersion: v1
kind: ServiceAccount
metadata:
  name: kube-state-metrics

---

# kube-state-metrics-cluster-role

apiVersion: rbac.authorization.k8s.io/v1
# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
  name: kube-state-metrics
rules:
- apiGroups: [""]
  resources:
  - configmaps
  - secrets
  - nodes
  - pods
  - services
  - resourcequotas
  - replicationcontrollers
  - limitranges
  - persistentvolumeclaims
  - persistentvolumes
  - namespaces
  - endpoints
  verbs: ["list", "watch"]
- apiGroups: ["extensions"]
  resources:
  - daemonsets
  - deployments
  - replicasets
  - ingresses
  verbs: ["list", "watch"]
- apiGroups: ["apps"]
  resources:
  - daemonsets
  - deployments
  - replicasets
  - statefulsets
  verbs: ["list", "watch"]
- apiGroups: ["batch"]
  resources:
  - cronjobs
  - jobs
  verbs: ["list", "watch"]
- apiGroups: ["autoscaling"]
  resources:
  - horizontalpodautoscalers
  verbs: ["list", "watch"]
- apiGroups: ["policy"]
  resources:
  - poddisruptionbudgets
  verbs: ["list", "watch"]

---

apiVersion: rbac.authorization.k8s.io/v1
# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
  name: kube-state-metrics
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: kube-state-metrics
subjects:
- kind: ServiceAccount
  name: kube-state-metrics
  namespace: monitor

---

# kube-state-metrics-role

apiVersion: rbac.authorization.k8s.io/v1
# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1
kind: Role
metadata:
  name: kube-state-metrics-resizer
rules:
- apiGroups: [""]
  resources:
  - pods
  verbs: ["get"]
- apiGroups: ["apps"]
  resources:
  - deployments
  resourceNames: ["kube-state-metrics"]
  verbs: ["get", "update"]
- apiGroups: ["extensions"]
  resources:
  - deployments
  resourceNames: ["kube-state-metrics"]
  verbs: ["get", "update"]

---

apiVersion: rbac.authorization.k8s.io/v1
# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1
kind: RoleBinding
metadata:
  name: kube-state-metrics
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: kube-state-metrics-resizer
subjects:
- kind: ServiceAccount
  name: kube-state-metrics

---
apiVersion: apps/v1
# Kubernetes version 1.8.x should use apps/v1beta2
# Kubernetes versions before 1.8.0 should use apps/v1beta1 or extensions/v1beta1
kind: Deployment
metadata:
  name: kube-state-metrics
  labels:
    app: kube-state-metrics
    version: v1.5.0
spec:
  selector:
    matchLabels:
      app.kubernetes.io/name: kube-state-metrics
      app.kubernetes.io/release: kube-state-metrics
  replicas: 1
  template:
    metadata:
      labels:
        app.kubernetes.io/name: kube-state-metrics
        app.kubernetes.io/release: kube-state-metrics
    spec:
      serviceAccountName: kube-state-metrics
      containers:
      - name: kube-state-metrics
        image: quay.io/coreos/kube-state-metrics:v1.5.0
        ports:
        - name: http-metrics
          containerPort: 8080
        - name: telemetry
          containerPort: 8081
        readinessProbe:
          httpGet:
            path: /healthz
            port: 8080
          initialDelaySeconds: 5
          timeoutSeconds: 5
      - name: addon-resizer
        image: k8s.gcr.io/addon-resizer:1.8.3
        resources:
          limits:
            cpu: 150m
            memory: 50Mi
          requests:
            cpu: 150m
            memory: 50Mi
        env:
          - name: MY_POD_NAME
            valueFrom:
              fieldRef:
                fieldPath: metadata.name
          - name: MY_POD_NAMESPACE
            valueFrom:
              fieldRef:
                fieldPath: metadata.namespace
        command:
          - /pod_nanny
          - --container=kube-state-metrics
          - --cpu=100m
          - --extra-cpu=1m
          - --memory=100Mi
          - --extra-memory=2Mi
          - --threshold=5
          - --deployment=kube-state-metrics

---

# kube-state-metrics-service

apiVersion: v1
kind: Service
metadata:
  name: kube-state-metrics
  labels:
    app.kubernetes.io/name: kube-state-metrics
    app.kubernetes.io/release: kube-state-metrics
  annotations:
    prometheus.io/scrape: 'true'
    prometheus.io/scheme: 'https'
    prometheus.io/app-metrics: 'true'
    prometheus.io/app-metrics-path: '/metrics'
spec:
  ports:
  - name: http-metrics
    port: 8080
    targetPort: http-metrics
    protocol: TCP
  - name: telemetry
    port: 8081
    targetPort: telemetry
    protocol: TCP
  selector:
    k8s-app: kube-state-metrics

---

Node-exporter


# node-exporter

apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
  name: prometheus-node-exporter
  labels:
    app: prometheus-node-exporter
    version: v0.17.0
spec:
  template:
    metadata:
      name: prometheus-node-exporter
      labels:
        app.kubernetes.io/name:  prometheus-node-exporter
        app.kubernetes.io/release:  prometheus-node-exporter
    spec:
      containers:
      - image: prom/node-exporter:v0.17.0
        imagePullPolicy: IfNotPresent
        name: prometheus-node-exporter
        ports:
        - name: prom-node-exp
          #^ must be an IANA_SVC_NAME (at most 15 characters, ..)
          containerPort: 9100
          hostPort: 9100
      tolerations:
      - key: "node-role.kubernetes.io/master"
        effect: "NoSchedule"
      hostNetwork: true
      hostPID: true
      hostIPC: true
      restartPolicy: Always
      
---
apiVersion: v1
kind: Service
metadata:
  annotations:
    prometheus.io/scrape: 'true'
    prometheus.io/app-metrics: 'true'
    prometheus.io/app-metrics-path: '/metrics'
  name: prometheus-node-exporter
  labels:
    app: prometheus-node-exporter
spec:
  clusterIP: None
  ports:
    - name: prometheus-node-exporter
      port: 9100
      protocol: TCP
  selector:
    app.kubernetes.io/name:  prometheus-node-exporter
    app.kubernetes.io/release:  prometheus-node-exporter
  type: ClusterIP

---

你可能感兴趣的:(kubernetes)