一、准备
1、k8s 1.23
2、helm 3.8
3、minio最新版本 (请自行安装,本人使用docker部暑单节点)
4、kube-prometheus-stack 版本为:35.0.0 (helm安装)
5、kube-thanos版本为:10.3.6 (helm安装)
6、准备两套k8s, 分别使用 *.lady.cn(监控)
和 *.kids.cn(被监控)
二、目标
lady.cn 部暑以下组件
kids.cn部暑经以下组件
三、 minio 已在独立服务器部暑minio,作为S3对象存储
172.16.0.39:9000 admin / Thanos@654321
四、部暑kube-prometheus-stack(分别在两个集群中部暑)
#添加 kubernetes-dashboard helm chart
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
# 更新下仓库
helm repo update
#指定变量
pro=kube-prometheus-stack
chart_version=35.0.0
mkdir -p /data/$pro
cd /data/$pro
#下载charts
helm pull prometheus-community/$pro --version=$chart_version
#提取values.yaml文件
tar zxvf $pro-$chart_version.tgz --strip-components 1 $pro/values.yaml
cat > /data/$pro/start.sh << EOF
helm upgrade --create-namespace --wait --install $pro $pro-$chart_version.tgz \
-f values.yaml \
-n monitoring
EOF
kubeTargetVersionOverride: "1.23.4" #指定k8s版本
---
alertmanager:
ingress:
enabled: true
hosts:
- alertmanager.lady.cn #注意修改
---
grafana:
ingress:
enabled: true
hosts:
- grafana.lady.cn #注意修改
---
prometheus:
thanosService:
enabled: true
thanosServiceExternal:
enabled: true #设为开启
type: NodePort #注意修改,有loadbance时,改为LoadBalancer
extraSecret: #配置thanos的bucket-config, 里面包括了objstor(minio)的配置
name: bucket-config
data:
objstore.yml: |
type: S3
config:
bucket: "lady-bucket" #minio的桶名,注意修改
endpoint: "172.16.0.39:9000" #minio的地址
access_key: "Thanos" #minio的帐号
secret_key: "Thanos@654321" #minio的密码
insecure: true #不验证tls证书
ingress:
enabled: true
hosts:
- prometheus.lady.cn #注意修改
prometheusSpec:
disableCompaction: true #kube-prometheus-stack 启用thanos-sidecar
thanos:
objectStorageConfig: #thanos使用上边的secret来配置thanos-sidecar
name: bucket-config
key: objstore.yml
---
kubeControllerManager:
endpoints:
- 192.168.11.100 #注意修改
service:
port: 10257 #此处端口一定要配置
---
kubeScheduler:
endpoints:
- 192.168.11.100 #注意修改
service:
port: 10259 #此处端口一定要配置
---
kubeEtcd:
endpoints:
- 192.168.11.100 #注意修改
---
kubeProxy:
endpoints:
- 192.168.11.100 #注意修改
#alertmanager
storage:
volumeClaimTemplate:
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 20Gi
#grafana
persistence:
type: pvc
enabled: true
accessModes:
- ReadWriteOnce
size: 10Gi
#prometheus
storageSpec:
volumeClaimTemplate:
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 50Gi
启动
bash /data/kube-prometheus-stack/start.sh
本图是thanos-sidecar上传数据到minio的结果
五、kube-thanos安装
1、下载charts
#添加 kubernetes-dashboard helm chart
helm repo add bitnami https://charts.bitnami.com/bitnami
# 更新下仓库
helm repo update
#指定变量
pro=thanos
chart_version=10.3.6
mkdir -p /data/$pro
cd /data/$pro
#下载charts
helm pull bitnami/$pro --version=$chart_version
#提取values.yaml文件
tar zxvf $pro-$chart_version.tgz --strip-components 1 $pro/values.yaml
cat > /data/$pro/start.sh << EOF
helm upgrade --wait --create-namespace --install $pro $pro-$chart_version.tgz \
-f values.yaml \
-n monitoring
EOF
2、 配置values.yaml
#此处对应kube-prometheus-stack的values.yaml配置中的prometheus.extraSecret.name
existingObjstoreSecret: "bucket-config"
query:
replicaLabel: [lady_replica] #去重标记,注意修改
dnsDiscovery:
sidecarsService: "kube-prometheus-stack-thanos-discovery" #kube-prometheus-stack的thanos-servicename
sidecarsNamespace: "monitoring" #kube-prometheus-stack部暑空间
ingress:
enabled: true
hostname: thanos.lady.cn #注意修改
queryFrontend: #提供给grafana查询使用,看下图
enabled: true
compactor:
enabled: true
persistence:
enabled: false #生产环境设为true,持久化
storegateway:
enabled: true
persistence:
enabled: false #生产环境设为true,持久化
ruler:
enabled: true
replicaLabel: lady_replica #去重标记,注意修改
alertmanagers:
- kube-prometheus-stack-alertmanager:9093 #kube-prometheus-stack的servicename地址
existingConfigmap: "kube-prometheus-stack-alertmanager-overview" ##kube-prometheus-stack的alertmanagers配置
persistence:
enabled: false #生产环境设为true,持久化
ingress:
enabled: true
hostname: thanos-ruler.lady.cn #注意修改
bash /data/thanos/start.sh
3、query图,包含了sidecar、store、rule
在lady中集群中增加thanos-storegateway-kids 和thanos-query-kids来收集kids集群的数据
cat > /data/thanos/query-kids.yaml << 'EOF'
---
apiVersion: v1
kind: Endpoints
metadata:
name: thanos-query-kids
namespace: monitoring
subsets:
- addresses:
- ip: 192.168.11.101 #注意修改,这里指向kids.cn的集群
ports:
- name: grpc
port: 30901
protocol: TCP
- name: http
port: 30902
protocol: TCP
---
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/instance: thanos-query-kids
name: thanos-query-kids
namespace: monitoring
spec:
ports:
- name: grpc
port: 30901
protocol: TCP
targetPort: grpc
- name: http
port: 30902
protocol: TCP
targetPort: http
type: ClusterIP
EOF
kubectl apply -f /data/thanos/query-kids.yaml
cat > /data/thanos/storegateway-kids.yaml << 'EOF'
apiVersion: v1
kind: Secret
metadata:
labels:
app: kube-prometheus-stack-prometheus
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: kube-prometheus-stack
app.kubernetes.io/part-of: kube-prometheus-stack
name: bucket-config-kids
namespace: monitoring
data:
objstore.yml: dHlwZTogUzMKY29uZmlnOgogIGJ1Y2tldDogImtpZHMtYnVja2V0IiAgICAgICAgICAgICAgICAgICAgICAjbWluaW/nmoTmobblkI0KICBlbmRwb2ludDogIjE3Mi4xNi4wLjM5OjkwMDAiICAgICAgICAgICAgICAgI21pbmlv55qE5Zyw5Z2ACiAgYWNjZXNzX2tleTogIlRoYW5vcyIgICAgICAgICAgICAgICAgICAgICAgICNtaW5pb+eahOW4kOWPtwogIHNlY3JldF9rZXk6ICJUaGFub3NANjU0MzIxIiAgICAgICAgICAgICAgICAjbWluaW/nmoTlr4bnoIEKICBpbnNlY3VyZTogdHJ1ZSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgI+S4jemqjOivgXRsc+ivgeS5pgo=
type: Opaque
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
labels:
app.kubernetes.io/component: storegateway-kids
app.kubernetes.io/instance: thanos
app.kubernetes.io/name: thanos
name: thanos-storegateway-kids
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/component: storegateway-kids
app.kubernetes.io/instance: thanos
app.kubernetes.io/name: thanos
serviceName: thanos-storegateway-headless
template:
metadata:
labels:
app.kubernetes.io/component: storegateway-kids
app.kubernetes.io/instance: thanos
app.kubernetes.io/name: thanos
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchLabels:
app.kubernetes.io/component: storegateway-kids
app.kubernetes.io/instance: thanos
app.kubernetes.io/name: thanos
namespaces:
- monitoring
topologyKey: kubernetes.io/hostname
weight: 1
automountServiceAccountToken: true
containers:
- args:
- store
- --log.level=info
- --log.format=logfmt
- --grpc-address=0.0.0.0:10901
- --http-address=0.0.0.0:10902
- --data-dir=/data
- --objstore.config-file=/conf/objstore.yml
image: docker.io/bitnami/thanos:0.25.2-scratch-r5
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 6
httpGet:
path: /-/healthy
port: http
scheme: HTTP
initialDelaySeconds: 30
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 30
name: storegateway
ports:
- containerPort: 10902
name: http
protocol: TCP
- containerPort: 10901
name: grpc
protocol: TCP
readinessProbe:
failureThreshold: 6
httpGet:
path: /-/ready
port: http
scheme: HTTP
initialDelaySeconds: 30
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 30
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1001
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /conf
name: objstore-config
- mountPath: /data
name: data
dnsPolicy: ClusterFirst
restartPolicy: Always
schedulerName: default-scheduler
securityContext:
fsGroup: 1001
serviceAccount: thanos-storegateway
serviceAccountName: thanos-storegateway
terminationGracePeriodSeconds: 30
volumes:
- name: objstore-config
secret:
defaultMode: 420
secretName: bucket-config-kids
- emptyDir: {}
name: data
updateStrategy:
type: RollingUpdate
---
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: storegateway-kids
app.kubernetes.io/instance: thanos
app.kubernetes.io/name: thanos
name: thanos-storegateway-kids
namespace: monitoring
spec:
internalTrafficPolicy: Cluster
ipFamilies:
- IPv4
ipFamilyPolicy: SingleStack
ports:
- name: http
port: 9090
protocol: TCP
targetPort: http
- name: grpc
port: 10901
protocol: TCP
targetPort: grpc
selector:
app.kubernetes.io/component: storegateway-kids
app.kubernetes.io/instance: thanos
app.kubernetes.io/name: thanos
sessionAffinity: None
type: ClusterIP
EOF
kubectl apply -f /data/thanos/storegateway-kids.yaml
修改lady集群中的thanos-query
kubectl edit -n monitoring deployments.apps thanos-query
- --store=dnssrv+_grpc._tcp.kube-prometheus-stack-thanos-discovery.monitoring.svc.cluster.local
- --store=dnssrv+_grpc._tcp.thanos-storegateway.monitoring.svc.cluster.local
- --store=dnssrv+_grpc._tcp.thanos-ruler.monitoring.svc.cluster.local
- --store=dnssrv+_grpc._tcp.thanos-query-kids.monitoring.svc.cluster.local #增加此项,指向kids.cn
- --store=dnssrv+_grpc._tcp.thanos-storegateway-kids.monitoring.svc.cluster.local #增加此项,指向kids.cn
验证
lady集群
kubectl get pod -n monitoring
NAME READY STATUS RESTARTS AGE
alertmanager-kube-prometheus-stack-alertmanager-0 2/2 Running 2 (44h ago) 2d1h
kube-prometheus-stack-grafana-799446c5b9-8h2kh 3/3 Running 3 (44h ago) 2d1h
kube-prometheus-stack-kube-state-metrics-6c5d86887c-hr7l7 1/1 Running 1 (44h ago) 2d1h
kube-prometheus-stack-operator-5bbb5f4f64-dk5dr 1/1 Running 1 (44h ago) 2d1h
kube-prometheus-stack-prometheus-node-exporter-r6pcz 1/1 Running 1 (44h ago) 2d1h
prometheus-kube-prometheus-stack-prometheus-0 3/3 Running 3 (44h ago) 2d1h
thanos-compactor-66ccd948d-g72zt 1/1 Running 2 (44h ago) 2d
thanos-query-5df6c68bc5-vptrq 1/1 Running 0 53m
thanos-query-frontend-59df69d5c-gndz4 1/1 Running 1 (44h ago) 2d
thanos-ruler-0 1/1 Running 1 (44h ago) 2d
thanos-storegateway-0 1/1 Running 2 (44h ago) 2d
thanos-storegateway-kids-0 1/1 Running 0 155m
kids集群
kubectl get pod -n monitoring
NAME READY STATUS RESTARTS AGE
alertmanager-kube-prometheus-stack-alertmanager-0 2/2 Running 0 44h
kube-prometheus-stack-grafana-799446c5b9-fdgng 3/3 Running 0 44h
kube-prometheus-stack-kube-state-metrics-6c5d86887c-m7tw5 1/1 Running 0 44h
kube-prometheus-stack-operator-5bbb5f4f64-rxxn6 1/1 Running 0 44h
kube-prometheus-stack-prometheus-node-exporter-fqtjl 1/1 Running 0 44h
prometheus-kube-prometheus-stack-prometheus-0 3/3 Running 0 44h
thanos-compactor-66ccd948d-7tfzd 1/1 Running 0 43h
thanos-query-f6ffddfb4-8qhdj 1/1 Running 0 23h
thanos-query-frontend-59df69d5c-pwbxs 1/1 Running 0 43h
thanos-storegateway-0 1/1 Running 0 43h
thanos-query-forntend配置https://blog.csdn.net/qq_34556414/article/details/124997111