需要注意的是 kube-state-metrics 只是简单的提供一个 metrics 数据,并不会存储这些指标数据
,所以我们可以使用 Prometheus 来抓取这些数据然后存储,主要关注的是业务相关的一些元数据,比如 Deployment 、 Pod 、副本状态等;调度了多少个 replicas ?现在可用的有几个?多少个 Pod 是running/stopped/terminated 状态? Pod 重启了多少次?我有多少 job 在运行中当前的核心作用是:为 HPA 等组件提供决策指标支持
。metric-server仅仅是获取、格式化现有数据,写入特定的存储,实质上是一个监控系统。而 kube-state-metrics 是将 k8s 的运行状况在内存中做了个快照,并且获取新的指标,但他没有能力导出这些指标
[root@k8s-master-1 kube-state-metrics]# cat kube-state-metrics.yaml
apiVersion: v1
automountServiceAccountToken: false
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.4.2
name: kube-state-metrics
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.4.2
name: kube-state-metrics
rules:
- apiGroups:
- ""
resources:
- configmaps
- secrets
- nodes
- pods
- services
- resourcequotas
- replicationcontrollers
- limitranges
- persistentvolumeclaims
- persistentvolumes
- namespaces
- endpoints
verbs:
- list
- watch
- apiGroups:
- apps
resources:
- statefulsets
- daemonsets
- deployments
- replicasets
verbs:
- list
- watch
- apiGroups:
- batch
resources:
- cronjobs
- jobs
verbs:
- list
- watch
- apiGroups:
- autoscaling
resources:
- horizontalpodautoscalers
verbs:
- list
- watch
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
- apiGroups:
- policy
resources:
- poddisruptionbudgets
verbs:
- list
- watch
- apiGroups:
- certificates.k8s.io
resources:
- certificatesigningrequests
verbs:
- list
- watch
- apiGroups:
- storage.k8s.io
resources:
- storageclasses
- volumeattachments
verbs:
- list
- watch
- apiGroups:
- admissionregistration.k8s.io
resources:
- mutatingwebhookconfigurations
- validatingwebhookconfigurations
verbs:
- list
- watch
- apiGroups:
- networking.k8s.io
resources:
- networkpolicies
- ingresses
verbs:
- list
- watch
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.4.2
name: kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kube-state-metrics
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/scraped: "true" # 设置能被prometheus抓取到,因为不带这个annotation prometheus-service-endpoints 不会去抓这个metrics
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.4.2
name: kube-state-metrics
namespace: kube-system
spec:
# clusterIP: None # 允许通过svc来进行访问
ports:
- name: http-metrics
port: 8080
targetPort: http-metrics
- name: telemetry
port: 8081
targetPort: telemetry
selector:
app.kubernetes.io/name: kube-state-metrics
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.4.2
name: kube-state-metrics
namespace: kube-system
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: kube-state-metrics
template:
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.4.2
spec:
nodeName: k8s-master-1 # 设置在k8s-master-1上运行
tolerations: # 设置能容忍在master节点运行
- key: "node-role.kubernetes.io/master"
operator: "Exists"
effect: "NoSchedule"
automountServiceAccountToken: true
containers:
# - image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.4.2
- image: anjia0532/google-containers.kube-state-metrics.kube-state-metrics:v2.4.2
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 5
timeoutSeconds: 5
name: kube-state-metrics
ports:
- containerPort: 8080
name: http-metrics
- containerPort: 8081
name: telemetry
readinessProbe:
httpGet:
path: /
port: 8081
initialDelaySeconds: 5
timeoutSeconds: 5
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsUser: 65534
serviceAccountName: kube-state-metrics
# 查看pod
[root@k8s-master-1 prometheus-server]# kubectl get pods -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-prometheus grafana-5c987bb879-q7swc 1/1 Running 0 63m
kube-prometheus node-exporter-hcgdj 1/1 Running 0 67m
kube-prometheus node-exporter-kkqds 1/1 Running 0 67m
kube-prometheus prometheus-server-5ffd56fdf4-qrsck 1/1 Running 0 2m47s
kube-system calico-kube-controllers-5855d94c7d-wp4gs 1/1 Running 3 35h
kube-system calico-node-78xm6 1/1 Running 3 35h
kube-system calico-node-8tmst 1/1 Running 3 35h
kube-system coredns-6f4c9cb7c5-r9pqt 1/1 Running 3 35h
kube-system kube-state-metrics-55b6bd9f47-g6z85 1/1 Running 0 10m
Kube-State-Metrics部署参考链接:https://github.com/kubernetes/kube-state-metrics/tree/master/examples/standard
Alert规则参考链接:https://github.com/kubernetes/kube-state-metrics/tree/master/examples/prometheus-alerting-rules
anjia0532解决镜像无法拉取链接:https://github.com/anjia0532/gcr.io_mirror
Prometheus server配置参考链接:https://blog.csdn.net/qq_41586875/article/details/124513482