Prometheus 是一款基于时序数据库的开源监控告警系统,非常适合Kubernetes集群的监控。Prometheus的基本原理是通过HTTP协议周期性抓取被监控组件的状态,任意组件只要提供对应的HTTP接口就可以接入监控。不需要任何SDK或者其他的集成过程。这样做非常适合做虚拟化环境监控系统,比如VM、Docker、Kubernetes等。输出被监控组件信息的HTTP接口被叫做exporter 。目前互联网公司常用的组件大部分都有exporter可以直接使用,比如Varnish、Haproxy、Nginx、MySQL、Linux系统信息(包括磁盘、内存、CPU、网络等等)。Promethus有以下特点:
1、支持多维数据模型:由度量名和键值对组成的时间序列数据
2、内置时间序列数据库TSDB
3、支持PromQL查询语言,可以完成非常复杂的查询和分析,对图表展示和告警非常有意义
4、支持HTTP的Pull方式采集时间序列数据
5、支持PushGateway采集瞬时任务的数据
6、支持服务发现和静态配置两种方式发现目标
7、支持接入Grafana
prometheus server:收集和存储、检索时间序列数据
1、Retrieval:获取监控数据
2、TSDB:时间序列数据库
3、HTTP Server:为告警和出图提供查询接口
pushgateway:用于prometheus无法直接pull的监控部分支持
service discovery:服务发现
1、静态服务发现
2、动态服务发现
Alertmanager:处理告警
PromQL:用于查询和展示数据
root@master1:~# mkdir /apps
root@master1:~# cd /apps
root@master1:~/apps# wget https://github.com/prometheus/prometheus/releases/download/v2.38.0/prometheus-2.38.0.linux-amd64.tar.gz
root@master1:~/apps# tar xf prometheus-2.38.0.linux-amd64.tar.gz
root@master1:~/apps# ln -sf /apps/prometheus-2.38.0.linux-amd64 /apps/prometheus
root@master1:~/apps# cd /apps/prometheus
root@master1:~/apps/prometheus# ll
total 207312
drwxr-xr-x 4 3434 3434 169 Aug 29 15:21 ./
drwxr-xr-x 4 root root 118 Aug 29 15:22 ../
-rw-r--r-- 1 3434 3434 11357 Aug 16 21:42 LICENSE
-rw-r--r-- 1 3434 3434 3773 Aug 16 21:42 NOTICE
drwxr-xr-x 2 3434 3434 38 Aug 16 21:42 console_libraries/
drwxr-xr-x 2 3434 3434 173 Aug 16 21:42 consoles/
-rwxr-xr-x 1 3434 3434 110234973 Aug 16 21:26 prometheus* # Prometheus主程序
-rw-r--r-- 1 3434 3434 934 Aug 16 21:42 prometheus.yml # Prometheus主配置文件
-rwxr-xr-x 1 3434 3434 102028302 Aug 16 21:28 promtool* # 测试工具,用于检测配置prometheus配置文件,检测metrics数据
root@master1:~# vim /etc/systemd/system/prometheus.service
[Unit]
Description=Prometheus Server
Docmentation=https://prometheus.io/docs/introduction/overview/
After=network.target
[Service]
Restart=on-failure
WorkingDirectory=/apps/prometheus/
ExecStart=/apps/prometheus/prometheus --config.file=/apps/prometheus/prometheus.yml
[Install]
WantedBy=multi-user.targe
root@master1:~# systemctl daemon-reload
root@master1:~# systemctl enable --now prometheus.service
root@master1:~# mkdir /apps
root@master1:~# cd /apps
root@master1:~/apps# wget https://github.com/prometheus/node_exporter/releases/download/v1.3.1/node_exporter-1.3.1.linux-amd64.tar.gz
root@master1:~/apps# tar xf node_exporter-1.3.1.linux-amd64.tar.gz
root@master1:~/apps# ln -sf /apps/node_exporter-1.3.1.linux-amd64 /apps/node_exporter
root@master1:~# vim /etc/systemd/system/prometheus.service
[Unit]
Description=Prometheus Node Exporter
After=network.target
[Service]
ExecStart=/apps/node_exporter/node_exporter
[Install]
WantedBy=multi-user.target
root@master1:~/apps# apps/node_exporter# systemctl enable --now node-exporter.service
Created symlink /etc/systemd/system/multi-user.target.wants/node-exporter.service → /etc/systemd/system/node-exporter.service
ss -ntlup|grep 9100
tcp LISTEN 0 4096 *:9100 *:* users:(("node_exporter",pid=123757,fd=3))
root@master1:~/apps# vi prometheus.yml
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 1m
scrape_configs:
- job_name: 'kubernetes-node'
static_configs:
- targets: ["172.17.1.102:9100","172.17.1.103:9100"]
( Operator部署器是基于已经编写好的yaml文件,可以将prometheus server,alertmanager,grafana,node-exporter等组件一键批量部署在k8s内部. )
https://github.com/prometheus-operator/kube-prometheus
注意:根据k8s的版本选择Operator
root@master1:~/app# kubectl get nodes
NAME STATUS ROLES AGE VERSION
172.17.1.101 Ready,SchedulingDisabled master 13d v1.22.2
172.17.1.102 Ready node 13d v1.22.2
172.17.1.103 Ready node 13d v1.22.2
172.17.1.104 Ready node 11d v1.22.2
https://codeload.github.com/prometheus-operator/kube-prometheus/zip/refs/heads/release-0.10
root@master1:~# git clone https://github.com/prometheus-operator/kube-prometheus.git
root@master1:~# cd /root/kube-prometheus/manifests
root@master1:~/kube-prometheus/manifests# kubectl create -f setup/
可以临时使用以下命令临时指定service使得prometheus通过NodePort提供服务
root@master1:~# kubectl -n monitoring port-forward svc/prometheus-k8s 9090
修改service使得prometheus通过NodePort提供服务
spec:
type: NodePort
ports:
- name: web
port: 9090
targetPort: web
- name: reloader-web
port: 8080
targetPort: reloader-web
root@master1:~/kube-prometheus/manifests# kubectl apply -f .
- name: web
port: 9090
targetPort: web
- name: reloader-web
port: 8080
targetPort: reloader-web
root@master1:~/kube-prometheus/manifests# kubectl apply -f .
将node的/proc,/sys,/分别映射到node-export中,这样就能监控node的状态
root@master1:~/app# cat daemonset-node-export.yaml
apiVersion: v1
kind: Namespace
metadata:
name: monitoring
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-exporter
namespace: monitoring
labels:
k8s-app: node-exporter
spec:
selector:
matchLabels:
k8s-app: node-exporter
template:
metadata:
labels:
k8s-app: node-exporter
spec:
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/master
containers:
- image: prom/node-exporter:v1.6.1
imagePullPolicy: IfNotPresent
name: prometheus-node-exporter
ports:
- containerPort: 9100
hostPort: 9100
protocol: TCP
name: metrics
volumeMounts:
- mountPath: /host/proc
name: proc
- mountPath: /host/sys
name: sys
- mountPath: /host
name: rootfs
args:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --path.rootfs=/host
volumes:
- name: proc
hostPath:
path: /proc
- name: sys
hostPath:
path: /sys
- name: rootfs
hostPath:
path: /
hostNetwork: true
hostPID: true
---
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/scrape: "true"
labels:
k8s-app: node-exporter
name: node-exporter
namespace: monitoring
spec:
type: NodePort
ports:
- name: http
port: 9100
nodePort: 39100
protocol: TCP
selector:
k8s-app: node-exporter
#验证
root@master1:~/app# kubectl get pod -n monitoring
NAME READY STATUS RESTARTS AGE
node-exporter-fzgdt 1/1 Running 0 111m
node-exporter-gmvqn 1/1 Running 0 111m
node-exporter-mdkl2 1/1 Running 0 111m
node-exporter-xkvbf 1/1 Running 0 111m
---
kind: ConfigMap
apiVersion: v1
metadata:
labels:
app: prometheus
name: prometheus-config
namespace: monitoring
data:
prometheus.yml: |
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 1m
scrape_configs:
- job_name: 'kubernetes-node'
kubernetes_sd_configs:
- role: node
relabel_configs:
- source_labels: [__address__]
regex: '(.*):10250'
replacement: '${1}:9100'
target_label: __address__
action: replace
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
- job_name: 'kubernetes-apiserver'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
root@master1:~/app# kubectl create serviceaccount monitor -n monitoring
serviceaccount/monitor created
root@master1:~/app# kubectl create clusterrolebinding monitor-clusterrolebinding -n monitoring --clusterrole=cluster-admin --serviceaccount=monitoring:monitor
clusterrolebinding.rbac.authorization.k8s.io/monitor-clusterrolebinding created
root@master1:~/app# cat deployment-prometheus.yaml
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-server
namespace: monitoring
labels:
app: prometheus
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
component: server
#matchExpressions:
#- {key: app, operator: In, values: [prometheus]}
#- {key: component, operator: In, values: [server]}
template:
metadata:
labels:
app: prometheus
component: server
annotations:
prometheus.io/scrape: 'false'
spec:
nodeName: 172.17.1.101
serviceAccountName: monitor
containers:
- name: prometheus
image: prom/prometheus:v2.31.2
imagePullPolicy: IfNotPresent
command:
- prometheus
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus
- --storage.tsdb.retention=720h
ports:
- containerPort: 9090
protocol: TCP
volumeMounts:
- mountPath: /etc/prometheus/prometheus.yml
name: prometheus-config
subPath: prometheus.yml
- mountPath: /prometheus/
name: prometheus-storage-volume
volumes:
- name: prometheus-config
configMap:
name: prometheus-config
items:
- key: prometheus.yml
path: prometheus.yml
mode: 0644
- name: prometheus-storage-volume
hostPath:
path: /data/prometheus #这个目录需要手动创建
type: Directory
root@master1:~/app# cat service-prometheus.yaml
---
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: monitoring
labels:
app: prometheus
spec:
type: NodePort
ports:
- port: 9090
targetPort: 9090
nodePort: 30090
protocol: TCP
selector:
app: prometheus
component: server
#验证
root@master1:~/app# kubectl get pod -n monitoring
NAME READY STATUS RESTARTS AGE
node-exporter-fzgdt 1/1 Running 0 125m
node-exporter-gmvqn 1/1 Running 0 125m
node-exporter-mdkl2 1/1 Running 0 125m
node-exporter-xkvbf 1/1 Running 0 125m
prometheus-server-5f4fffc7d6-slkww 1/1 Running 0 102s
root@master1:~/app# kubectl get svc -n monitoring
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
node-exporter NodePort 10.100.139.87 <none> 9100:39100/TCP 126m
prometheus NodePort 10.100.169.254 <none> 9090:30090/TCP 125m