Prometheus是一个开源的系统监控和报警系统,现在已经加入到CNCF基金会,成为继k8s之后第二个在CNCF托管的项目,在kubernetes容器管理系统中,通常会搭配prometheus进行监控,同时也支持多种exporter采集数据,还支持pushgateway进行数据上报,Prometheus性能足够支撑上万台规模的集群。
作为新一代的监控框架,Prometheus 具有以下特点:
需要指出的是,由于数据采集可能会有丢失,所以 Prometheus 不适用对采集数据要 100% 准确的情形。但如果用于记录时间序列数据,Prometheus 具有很大的查询优势,此外,Prometheus 适用于微服务的体系架构。
Prometheus 生态圈中包含了多个组件,其中许多组件是可选的:
这里就不再过多的介绍 Prometheus ,更多详情请参考
https://www.ibm.com/developerworks/cn/cloud/library/cl-lo-prometheus-getting-started-and-practice/index.html
[root@k8s-master-01 namespaces]# more prom-grafana-namespaces.yaml
apiVersion: v1
kind: Namespace
metadata:
name: prom-grafana
labels:
name: prom-grafana
[root@k8s-master-01 namespaces]# kubectl create -f prom-grafana-namespaces.yaml
namespace/prom-grafana created
[root@k8s-master-01 prom+grafana]# kubectl create serviceaccount drifter -n prom-grafana
serviceaccount/drifter created
[root@k8s-master-01 prom+grafana]# kubectl create clusterrolebinding drifter-clusterrolebinding -n prom-grafana --clusterrole=cluster-admin --serviceaccount=prom-grafana:drifter
clusterrolebinding.rbac.authorization.k8s.io/drifter-clusterrolebinding created
在k8s集群的任何一个node节点操作,或者挂载OSS等,我是部署在某一台节点之上
[root@k8s-node-06 ~]# mkdir /data
[root@k8s-node-06 ~]# chmod 777 /data/
采集机器(物理机、虚拟机、云主机等)的监控指标数据,能够采集到的指标包括CPU, 内存,磁盘,网络,文件数等信息。
安装node-exporter组件,在k8s集群的master节点操作
[root@k8s-master-01 prom+grafana]# more node-export.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-exporter
namespace: prom-grafana
labels:
name: node-exporter
spec:
selector:
matchLabels:
name: node-exporter
template:
metadata:
labels:
name: node-exporter
spec:
hostPID: true
hostIPC: true
hostNetwork: true
containers:
- name: node-exporter
image: prom/node-exporter:v0.16.0
ports:
- containerPort: 9100
resources:
requests:
cpu: 0.15
securityContext:
privileged: true
args:
- --path.procfs
- /host/proc
- --path.sysfs
- /host/sys
- --collector.filesystem.ignored-mount-points
- '"^/(sys|proc|dev|host|etc)($|/)"'
volumeMounts:
- name: dev
mountPath: /host/dev
- name: proc
mountPath: /host/proc
- name: sys
mountPath: /host/sys
- name: rootfs
mountPath: /rootfs
tolerations:
- key: "node-role.kubernetes.io/master"
operator: "Exists"
effect: "NoSchedule"
volumes:
- name: proc
hostPath:
path: /proc
- name: dev
hostPath:
path: /dev
- name: sys
hostPath:
path: /sys
- name: rootfs
hostPath:
path: /
部署并查看node-export 组件状态
[root@k8s-master-01 prom+grafana]# kubectl create -f node-export.yaml
daemonset.apps/node-exporter created
[root@k8s-master-01 prom+grafana]# kubectl get pods -n prom-grafana
NAME READY STATUS RESTARTS AGE
node-exporter-69gdh 1/1 Running 0 20s
node-exporter-6ptnr 1/1 Running 0 20s
node-exporter-7pdgm 1/1 Running 0 20s
node-exporter-fjlq2 1/1 Running 0 20s
node-exporter-jfncm 1/1 Running 0 20s
node-exporter-p86f8 1/1 Running 0 20s
node-exporter-qcp6w 1/1 Running 0 20s
node-exporter-v857r 1/1 Running 0 20s
node-exporter-vclgh 1/1 Running 0 20s
可以看到所有的pod 的状态为 Running ,证明部署是OK的可以查看下,通过node-exporter采集的数据,node-export默认的监听端口是9100,可以看到当前主机获取到的所有监控数据(如图)
HELP:解释当前指标的含义,上面表示在每种模式下node节点的cpu花费的时间,以s为单位
TYPE:说明当前指标的数据类型,上面是counter类型
node_load1:该指标反映了当前主机在最近一分钟以内的负载情况,系统的负载情况会随系统资源的使用而变化,因此node_load1反映的是当前状态,数据可能增加也可能减少,从注释中可以看出当前指标类型为gauge(标准尺寸)
node_cpu_seconds_total{cpu=“0”,mode=“idle”} :
cpu0上idle进程占用CPU的总时间,CPU占用时间是一个只增不减的度量指标,从类型中也可以看出node_cpu的数据类型是counter(计数器)
counter计数器:只是采集递增的指标
gauge标准尺寸:统计的指标可增加可减少
[root@k8s-master-01 prom+grafana]# more prometheus-cfg.yaml
---
kind: ConfigMap
apiVersion: v1
metadata:
labels:
app: prometheus
name: prometheus-config
namespace: monitor-sa
data:
prometheus.yml: |
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 1m
scrape_configs:
- job_name: 'kubernetes-node'
kubernetes_sd_configs:
- role: node
relabel_configs:
- source_labels: [__address__]
regex: '(.*):10250'
replacement: '${1}:9100'
target_label: __address__
action: replace
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- job_name: 'kubernetes-node-cadvisor'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: 'kubernetes-apiserver'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
[root@k8s-master-01 prom+grafana]# kubectl create -f prometheus-cfg.yaml
configmap/prometheus-config created
[root@k8s-master-01 prom+grafana]# more prometheus-deployment.yaml
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-server
namespace: prom-grafana
labels:
app: prometheus
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
component: server
#matchExpressions:
#- {key: app, operator: In, values: [prometheus]}
#- {key: component, operator: In, values: [server]}
template:
metadata:
labels:
app: prometheus
component: server
annotations:
prometheus.io/scrape: 'false'
spec:
nodeName: k8s-node-06
serviceAccountName: drifter
containers:
- name: prometheus
image: prom/prometheus:v2.2.1
imagePullPolicy: IfNotPresent
command:
- prometheus
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus
- --storage.tsdb.retention=720h
ports:
- containerPort: 9090
protocol: TCP
volumeMounts:
- mountPath: /etc/prometheus/prometheus.yml
name: prometheus-config
subPath: prometheus.yml
- mountPath: /prometheus/
name: prometheus-storage-volume
volumes:
- name: prometheus-config
configMap:
name: prometheus-config
items:
- key: prometheus.yml
path: prometheus.yml
mode: 0644
- name: prometheus-storage-volume
hostPath:
path: /data
type: Directory
注意:在上面的prometheus-deploy.yaml文件有个nodeName字段,这个就是用来指定创建的这个prometheus的pod调度到哪个节点上,我们这里让nodeName=k8s-node-06,也即是让pod调度到k8s-node-06节点上,因为k8s-node-06节点我们创建了数据目录/data,所以大家记住:你在k8s集群的哪个节点创建/data,就让pod调度到哪个节点
[root@k8s-master-01 prom+grafana]# kubectl create -f prometheus-deployment.yaml
deployment.apps/prometheus-server created
[root@k8s-master-01 prom+grafana]# kubectl get pod -n prom-grafana
NAME READY STATUS RESTARTS AGE
node-exporter-69gdh 1/1 Running 0 7h36m
node-exporter-6ptnr 1/1 Running 0 7h36m
node-exporter-7pdgm 1/1 Running 0 7h36m
node-exporter-fjlq2 1/1 Running 0 7h36m
node-exporter-jfncm 1/1 Running 0 7h36m
node-exporter-p86f8 1/1 Running 0 7h36m
node-exporter-qcp6w 1/1 Running 0 7h36m
node-exporter-v857r 1/1 Running 0 7h36m
node-exporter-vclgh 1/1 Running 0 7h36m
prometheus-server-6bf69dddc5-v527d 1/1 Running 0 7m48s
[root@k8s-master-01 prom+grafana]# more prometheus-svc.yaml
---
apiVersion: v1
kind: Service
metadata:
name: prometheus-server
namespace: prom-grafana
labels:
app: prometheus
spec:
# type: NodePort
type: ClusterIP
ports:
- port: 9090
targetPort: 9090
# protocol: TCP
selector:
app: prometheus
component: prometheus-server
[root@k8s-master-01 prom+grafana]# kubectl create -f prometheus-svc.yaml
service/prometheus created
[root@k8s-master-01 prom+grafana]# kubectl get svc -n prom-grafana
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
prometheus-server ClusterIP 10.103.172.209 9090/TCP 4m57s
[root@k8s-master-01 prom+grafana]# more prometheus-ing.yaml
---
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
name: prometheus
namespace: prom-grafana
annotations:
nginx.ingress.kubernetes.io/proxy-body-size: "20M"
# nginx.ingress.kubernetes.io/ssl-redirect: false
# nginx.ingress.kubernetes.io/rewrite-target: /
spec:
rules:
- host: prometheus.drifter.net
http:
paths:
- path: /
backend:
serviceName: prometheus-server
servicePort: 9090
status:
loadBalancer:
ingress:
- ip: 10.10.100.74
- ip: 10.10.100.75
- ip: 10.10.100.76
- ip: 10.10.100.77
- ip: 10.10.100.78
- ip: 10.10.100.79
[root@k8s-master-01 prom+grafana]# kubectl create -f prometheus-ing.yaml
ingress.extensions/prometheus created
[root@k8s-master-01 prom+grafana]# kubectl get ing -n prom-grafana
NAME CLASS HOSTS ADDRESS PORTS AGE
prometheus prometheus.drifter.net 80 4s
100.119.255.145 便是 prometheus的pod的ip地址
[root@k8s-master-01 prom+grafana]# kubectl get pods -n prom-grafana -o wide | grep prometheus
prometheus-server-6bf69dddc5-v527d 1/1 Running 0 10m 100.119.255.145 k8s-node-06
kubectl delete -f prometheus-cfg.yaml
kubectl delete -f prometheus-deployment.yaml
kubectl apply -f prometheus-cfg.yaml
kubectl apply -f prometheus-deployment.yaml
[root@k8s-master-01 prom+grafana]# more grafana.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: grafana-server
namespace: prom-grafana
spec:
replicas: 1
selector:
matchLabels:
task: monitoring
k8s-app: grafana
template:
metadata:
labels:
task: monitoring
k8s-app: grafana
spec:
imagePullSecrets:
- name: registry-pps
containers:
- name: grafana-server
image: registry.drifter.net/grafana:5.0.4
ports:
- containerPort: 3000
protocol: TCP
volumeMounts:
- mountPath: /etc/ssl/certs
name: ca-certificates
readOnly: true
- mountPath: /var
name: grafana-storage
env:
- name: INFLUXDB_HOST
value: monitoring-influxdb
- name: GF_SERVER_HTTP_PORT
value: "3000"
- name: GF_AUTH_BASIC_ENABLED
value: "false"
- name: GF_AUTH_ANONYMOUS_ENABLED
value: "true"
- name: GF_AUTH_ANONYMOUS_ORG_ROLE
value: Admin
- name: GF_SERVER_ROOT_URL
value: /
volumes:
- name: ca-certificates
hostPath:
path: /etc/ssl/certs
- name: grafana-storage
emptyDir: {}
[root@k8s-master-01 prom+grafana]# kubectl create -f grafana.yaml
deployment.apps/grafana-server created
[root@k8s-master-01 prom+grafana]# kubectl get pods -n prom-grafana
NAME READY STATUS RESTARTS AGE
grafana-server-657495c99d-x5hnn 1/1 Running 0 23s
node-exporter-g747h 1/1 Running 0 6m16s
node-exporter-gbfvk 1/1 Running 0 6m16s
node-exporter-glvvw 1/1 Running 0 6m16s
node-exporter-hpvmj 1/1 Running 0 6m16s
node-exporter-ndp9l 1/1 Running 0 6m16s
node-exporter-nfbdg 1/1 Running 0 6m16s
node-exporter-rjrzw 1/1 Running 0 6m16s
node-exporter-z9c97 1/1 Running 0 6m16s
node-exporter-zcb6h 1/1 Running 0 6m16s
prometheus-server-6bf69dddc5-9m4nw 1/1 Running 0 4m16s
[root@k8s-master-01 prom+grafana]# more grafana-svc.yaml
---
apiVersion: v1
kind: Service
metadata:
labels:
kubernetes.io/cluster-service: 'true'
kubernetes.io/name: grafana-server
name: grafana-server
namespace: prom-grafana
spec:
ports:
- port: 80
targetPort: 3000
selector:
k8s-app: grafana
# type: NodePort
type: ClusterIP
[root@k8s-master-01 prom+grafana]# kubectl create -f grafana-svc.yaml
service/grafana-server created
[root@k8s-master-01 prom+grafana]# kubectl get svc -n prom-grafana
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
grafana-server ClusterIP 10.102.129.245 3000/TCP 2m3s
prometheus-server ClusterIP 10.96.17.72 9090/TCP 12m
---
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
name: grafana-server
namespace: prom-grafana
annotations:
nginx.ingress.kubernetes.io/proxy-body-size: "20M"
# nginx.ingress.kubernetes.io/ssl-redirect: false
# nginx.ingress.kubernetes.io/rewrite-target: /
spec:
rules:
- host: graf.drifter.net
http:
paths:
- path: /
backend:
serviceName: grafana-server
servicePort: 80
status:
loadBalancer:
ingress:
- ip: 10.10.100.74
- ip: 10.10.100.75
- ip: 10.10.100.76
- ip: 10.10.100.77
- ip: 10.10.100.78
- ip: 10.10.100.79
[root@k8s-master-01 prom+grafana]# kubectl delete -f grafana-ing.yaml
ingress.extensions "grafana-server" deleted
[root@k8s-master-01 prom+grafana]# kubectl get ing -n prom-grafana
NAME CLASS HOSTS ADDRESS PORTS AGE
grafana-server graf.drifter.net 80 28m
prometheus prometheus.drifter.net 80 170m