更多内容,请查看博客原文:Kubernetes监控方案kube-prometheus(prometheus, node-exporter, grafana)
https://finolo.gy/2019/12/Kubernetes监控方案kube-prometheus-prometheus-node-exporter-grafana
kubernetes: v1.15.6
kube-prometheus: v0.2.0
到github上克隆kube-prometheus: v0.2.0。
进入yaml文件目录:
cd kube-prometheus-0.2.0/manifests/
新建目录,把yaml文件分门别类整理一下。
mkdir -p operator node-exporter alertmanager grafana kube-state-metrics prometheus serviceMonitor adapter add-service
mv *-serviceMonitor* serviceMonitor/
mv 0prometheus-operator* operator/
mv grafana-* grafana/
mv kube-state-metrics-* kube-state-metrics/
mv alertmanager-* alertmanager/
mv node-exporter-* node-exporter/
mv prometheus-adapter* adapter/
mv prometheus-* prometheus/
整理好以后,目录结构是这样的:
tree .
.
├── 00namespace-namespace.yaml
├── adapter
│ ├── prometheus-adapter-apiService.yaml
│ ├── prometheus-adapter-clusterRoleAggregatedMetricsReader.yaml
│ ├── prometheus-adapter-clusterRoleBindingDelegator.yaml
│ ├── prometheus-adapter-clusterRoleBinding.yaml
│ ├── prometheus-adapter-clusterRoleServerResources.yaml
│ ├── prometheus-adapter-clusterRole.yaml
│ ├── prometheus-adapter-configMap.yaml
│ ├── prometheus-adapter-deployment.yaml
│ ├── prometheus-adapter-roleBindingAuthReader.yaml
│ ├── prometheus-adapter-serviceAccount.yaml
│ └── prometheus-adapter-service.yaml
├── add-service
├── alertmanager
│ ├── alertmanager-alertmanager.yaml
│ ├── alertmanager-secret.yaml
│ ├── alertmanager-serviceAccount.yaml
│ └── alertmanager-service.yaml
├── grafana
│ ├── grafana-dashboardDatasources.yaml
│ ├── grafana-dashboardDefinitions.yaml
│ ├── grafana-dashboardSources.yaml
│ ├── grafana-deployment.yaml
│ ├── grafana-serviceAccount.yaml
│ └── grafana-service.yaml
├── kube-state-metrics
│ ├── kube-state-metrics-clusterRoleBinding.yaml
│ ├── kube-state-metrics-clusterRole.yaml
│ ├── kube-state-metrics-deployment.yaml
│ ├── kube-state-metrics-roleBinding.yaml
│ ├── kube-state-metrics-role.yaml
│ ├── kube-state-metrics-serviceAccount.yaml
│ └── kube-state-metrics-service.yaml
├── node-exporter
│ ├── node-exporter-clusterRoleBinding.yaml
│ ├── node-exporter-clusterRole.yaml
│ ├── node-exporter-daemonset.yaml
│ ├── node-exporter-serviceAccount.yaml
│ └── node-exporter-service.yaml
├── operator
│ ├── 0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml
│ ├── 0prometheus-operator-0podmonitorCustomResourceDefinition.yaml
│ ├── 0prometheus-operator-0prometheusCustomResourceDefinition.yaml
│ ├── 0prometheus-operator-0prometheusruleCustomResourceDefinition.yaml
│ ├── 0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml
│ ├── 0prometheus-operator-clusterRoleBinding.yaml
│ ├── 0prometheus-operator-clusterRole.yaml
│ ├── 0prometheus-operator-deployment.yaml
│ ├── 0prometheus-operator-serviceAccount.yaml
│ └── 0prometheus-operator-service.yaml
├── prometheus
│ ├── prometheus-clusterRoleBinding.yaml
│ ├── prometheus-clusterRole.yaml
│ ├── prometheus-prometheus.yaml
│ ├── prometheus-roleBindingConfig.yaml
│ ├── prometheus-roleBindingSpecificNamespaces.yaml
│ ├── prometheus-roleConfig.yaml
│ ├── prometheus-roleSpecificNamespaces.yaml
│ ├── prometheus-rules.yaml
│ ├── prometheus-serviceAccount.yaml
│ └── prometheus-service.yaml
└── serviceMonitor
├── 0prometheus-operator-serviceMonitor.yaml
├── alertmanager-serviceMonitor.yaml
├── grafana-serviceMonitor.yaml
├── kube-state-metrics-serviceMonitor.yaml
├── node-exporter-serviceMonitor.yaml
├── prometheus-serviceMonitorApiserver.yaml
├── prometheus-serviceMonitorCoreDNS.yaml
├── prometheus-serviceMonitorKubeControllerManager.yaml
├── prometheus-serviceMonitorKubelet.yaml
├── prometheus-serviceMonitorKubeScheduler.yaml
└── prometheus-serviceMonitor.yaml
在alertmanager-service.yaml
增加nodePort 30093
在grafana-service.yaml
增加nodePort 32000
添加两行
apiVersion: v1
kind: Service
metadata:
labels:
app: grafana
name: grafana
namespace: monitoring
spec:
type: NodePort
ports:
- name: http
port: 3000
targetPort: http
nodePort: 32000
selector:
app: grafana
在prometheus-service.yaml
增加nodePort 30090
执行命令:
kubectl apply -f .
namespace/monitoring created
kubectl apply -f operator/
customresourcedefinition.apiextensions.k8s.io/alertmanagers.monitoring.coreos.com created
customresourcedefinition.apiextensions.k8s.io/podmonitors.monitoring.coreos.com created
customresourcedefinition.apiextensions.k8s.io/prometheuses.monitoring.coreos.com created
customresourcedefinition.apiextensions.k8s.io/prometheusrules.monitoring.coreos.com created
customresourcedefinition.apiextensions.k8s.io/servicemonitors.monitoring.coreos.com created
clusterrole.rbac.authorization.k8s.io/prometheus-operator created
clusterrolebinding.rbac.authorization.k8s.io/prometheus-operator created
deployment.apps/prometheus-operator created
service/prometheus-operator created
serviceaccount/prometheus-operator created
kubectl apply -f adapter/
apiservice.apiregistration.k8s.io/v1beta1.metrics.k8s.io created
clusterrole.rbac.authorization.k8s.io/prometheus-adapter created
clusterrole.rbac.authorization.k8s.io/system:aggregated-metrics-reader created
clusterrolebinding.rbac.authorization.k8s.io/prometheus-adapter created
clusterrolebinding.rbac.authorization.k8s.io/resource-metrics:system:auth-delegator created
clusterrole.rbac.authorization.k8s.io/resource-metrics-server-resources created
configmap/adapter-config created
deployment.apps/prometheus-adapter created
rolebinding.rbac.authorization.k8s.io/resource-metrics-auth-reader created
service/prometheus-adapter created
serviceaccount/prometheus-adapter created
kubectl apply -f alertmanager/
alertmanager.monitoring.coreos.com/main created
secret/alertmanager-main created
service/alertmanager-main created
serviceaccount/alertmanager-main created
kubectl apply -f node-exporter/
clusterrole.rbac.authorization.k8s.io/node-exporter created
clusterrolebinding.rbac.authorization.k8s.io/node-exporter created
daemonset.apps/node-exporter created
service/node-exporter created
serviceaccount/node-exporter created
kubectl apply -f kube-state-metrics/
clusterrole.rbac.authorization.k8s.io/kube-state-metrics created
clusterrolebinding.rbac.authorization.k8s.io/kube-state-metrics created
deployment.apps/kube-state-metrics created
role.rbac.authorization.k8s.io/kube-state-metrics created
rolebinding.rbac.authorization.k8s.io/kube-state-metrics created
service/kube-state-metrics created
serviceaccount/kube-state-metrics created
kubectl apply -f grafana/
secret/grafana-datasources created
configmap/grafana-dashboard-apiserver created
configmap/grafana-dashboard-controller-manager created
configmap/grafana-dashboard-k8s-resources-cluster created
configmap/grafana-dashboard-k8s-resources-namespace created
configmap/grafana-dashboard-k8s-resources-pod created
configmap/grafana-dashboard-k8s-resources-workload created
configmap/grafana-dashboard-k8s-resources-workloads-namespace created
configmap/grafana-dashboard-kubelet created
configmap/grafana-dashboard-node-cluster-rsrc-use created
configmap/grafana-dashboard-node-rsrc-use created
configmap/grafana-dashboard-nodes created
configmap/grafana-dashboard-persistentvolumesusage created
configmap/grafana-dashboard-pods created
configmap/grafana-dashboard-prometheus-remote-write created
configmap/grafana-dashboard-prometheus created
configmap/grafana-dashboard-proxy created
configmap/grafana-dashboard-scheduler created
configmap/grafana-dashboard-statefulset created
configmap/grafana-dashboards created
deployment.apps/grafana created
service/grafana created
serviceaccount/grafana created
kubectl apply -f prometheus/
clusterrole.rbac.authorization.k8s.io/prometheus-k8s created
clusterrolebinding.rbac.authorization.k8s.io/prometheus-k8s created
prometheus.monitoring.coreos.com/k8s created
rolebinding.rbac.authorization.k8s.io/prometheus-k8s-config created
rolebinding.rbac.authorization.k8s.io/prometheus-k8s created
rolebinding.rbac.authorization.k8s.io/prometheus-k8s created
rolebinding.rbac.authorization.k8s.io/prometheus-k8s created
role.rbac.authorization.k8s.io/prometheus-k8s-config created
role.rbac.authorization.k8s.io/prometheus-k8s created
role.rbac.authorization.k8s.io/prometheus-k8s created
role.rbac.authorization.k8s.io/prometheus-k8s created
prometheusrule.monitoring.coreos.com/prometheus-k8s-rules created
service/prometheus-k8s created
serviceaccount/prometheus-k8s created
kubectl apply -f serviceMonitor/
servicemonitor.monitoring.coreos.com/prometheus-operator created
servicemonitor.monitoring.coreos.com/alertmanager created
servicemonitor.monitoring.coreos.com/grafana created
servicemonitor.monitoring.coreos.com/kube-state-metrics created
servicemonitor.monitoring.coreos.com/node-exporter created
servicemonitor.monitoring.coreos.com/prometheus created
servicemonitor.monitoring.coreos.com/kube-apiserver created
servicemonitor.monitoring.coreos.com/coredns created
servicemonitor.monitoring.coreos.com/kube-controller-manager created
servicemonitor.monitoring.coreos.com/kube-scheduler created
servicemonitor.monitoring.coreos.com/kubelet created
查看状态
kubectl -n monitoring get all
NAME READY STATUS RESTARTS AGE
pod/alertmanager-main-0 2/2 Running 0 24h
pod/alertmanager-main-1 2/2 Running 4 24h
pod/alertmanager-main-2 2/2 Running 0 24h
pod/grafana-57bfdd47f8-k9wcd 1/1 Running 0 9h
pod/kube-state-metrics-65d5b4b99d-gwfb8 4/4 Running 0 9h
pod/node-exporter-jmhff 2/2 Running 0 9h
pod/node-exporter-ld2sg 2/2 Running 0 9h
pod/node-exporter-qqmzw 2/2 Running 0 9h
pod/prometheus-adapter-668748ddbd-h6x2w 1/1 Running 0 25h
pod/prometheus-k8s-0 3/3 Running 3 8h
pod/prometheus-k8s-1 3/3 Running 2 8h
pod/prometheus-operator-55b978b89-lptfq 1/1 Running 0 26h
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
service/alertmanager-main NodePort 10.104.249.82 <none> 9093:30093/TCP 24h
service/alertmanager-operated ClusterIP None <none> 9093/TCP,9094/TCP,9094/UDP 24h
service/grafana NodePort 10.99.157.162 <none> 3000:32000/TCP 9h
service/kube-state-metrics ClusterIP None <none> 8443/TCP,9443/TCP 9h
service/node-exporter ClusterIP None <none> 9100/TCP 10h
service/prometheus-adapter ClusterIP 10.105.30.68 <none> 443/TCP 25h
service/prometheus-k8s NodePort 10.102.127.30 <none> 9090:30090/TCP 8h
service/prometheus-operated ClusterIP None <none> 9090/TCP 8h
service/prometheus-operator ClusterIP None <none> 8080/TCP 26h
NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE
daemonset.apps/node-exporter 3 3 3 3 3 kubernetes.io/os=linux 10h
NAME READY UP-TO-DATE AVAILABLE AGE
deployment.apps/grafana 1/1 1 1 9h
deployment.apps/kube-state-metrics 1/1 1 1 9h
deployment.apps/prometheus-adapter 1/1 1 1 25h
deployment.apps/prometheus-operator 1/1 1 1 26h
NAME DESIRED CURRENT READY AGE
replicaset.apps/grafana-57bfdd47f8 1 1 1 9h
replicaset.apps/kube-state-metrics-65d5b4b99d 1 1 1 9h
replicaset.apps/kube-state-metrics-77467ddf9b 0 0 0 9h
replicaset.apps/prometheus-adapter-668748ddbd 1 1 1 25h
replicaset.apps/prometheus-operator-55b978b89 1 1 1 26h
NAME READY AGE
statefulset.apps/alertmanager-main 3/3 24h
statefulset.apps/prometheus-k8s 2/2 8h
{% asset_image prometheus-targets.png %}
我们发现有两个targets下面没有内容,分别是kube-controller-manager
和kube-scheduler
。
查看其ServiceMonitor资源的定义,内容如下:
cat prometheus-serviceMonitorKubeControllerManager.yaml
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
k8s-app: kube-controller-manager
name: kube-controller-manager
namespace: monitoring
spec:
endpoints:
- interval: 30s
metricRelabelings:
- action: drop
regex: etcd_(debugging|disk|request|server).*
sourceLabels:
- __name__
port: http-metrics
jobLabel: k8s-app
namespaceSelector:
matchNames:
- kube-system
selector:
matchLabels:
k8s-app: kube-controller-manager
上面是一个典型的ServiceMonitor
资源文件的声明方式,上面我们通过selector .matchLabels
在kube-system
这个命名空间下面匹配具有k8s-app=kube-controller-manager
这样的Service,但是我们系统中根本就没有对应的Service。
所以,在add-service
目录下面创建Service文件prometheus-kubeControllerManagerService.yaml
apiVersion: v1
kind: Service
metadata:
namespace: kube-system
name: kube-controller-manager
labels:
k8s-app: kube-controller-manager
spec:
selector:
component: kube-controller-manager
ports:
- name: http-metrics
port: 10252
targetPort: 10252
protocol: TCP
prometheus-kubeControllerManagerService.yaml
文件也可以写成这样:
apiVersion: v1
kind: Service
metadata:
namespace: kube-system
name: kube-controller-manager
labels:
k8s-app: kube-controller-manager
spec:
selector:
component: kube-controller-manager
type: ClusterIP
clusterIP: None
ports:
- name: http-metrics
port: 10252
targetPort: 10252
protocol: TCP
---
apiVersion: v1
kind: Endpoints
metadata:
labels:
k8s-app: kube-controller-manager
name: kube-controller-manager
namespace: kube-system
subsets:
- addresses:
- ip: 172.16.64.233
# - ip: 10.0.0.15
# - ip: 10.0.0.20
ports:
- name: http-metrics
port: 10252
protocol: TCP
同理,在add-service
目录下面创建文件prometheus-kubeSchedulerService.yaml
apiVersion: v1
kind: Service
metadata:
namespace: kube-system
name: kube-scheduler
labels:
# matches the serviceMonitor selector
k8s-app: kube-scheduler
spec:
selector:
# matches scheduler's pod label
component: kube-scheduler
ports:
- name: http-metrics
port: 10251
targetPort: 10251
protocol: TCP
prometheus-kubeSchedulerService.yaml
也可以写成这样:
apiVersion: v1
kind: Service
metadata:
namespace: kube-system
name: kube-scheduler
labels:
k8s-app: kube-scheduler
spec:
type: ClusterIP
clusterIP: None
ports:
- name: port
port: 10251
protocol: TCP
---
apiVersion: v1
kind: Endpoints
metadata:
labels:
k8s-app: kube-scheduler
name: kube-scheduler
namespace: kube-system
subsets:
- addresses:
- ip: 172.16.64.233
# - ip: 10.0.0.15
# - ip: 10.0.0.20
ports:
- name: http-metrics
port: 10251
protocol: TCP
kubectl apply -f add-service/
这时,两个targets下面就会有内容产生了。
我们浏览Prometheus Dashboard上的Configuration页面,会有个疑问,这些配置内容是从哪里来的呢?
更多内容,请查看博客原文:Kubernetes监控方案kube-prometheus(prometheus, node-exporter, grafana)
https://finolo.gy/2019/12/Kubernetes监控方案kube-prometheus-prometheus-node-exporter-grafana