本次的目的是通过使用k8s搭建一个三节点的zookeeper
集群,因为zookeeper
集群需要用到存储,所以我们需要准备三个持久卷(Persistent Volume
) 简称就是PV。
分别对应三节点zk集群中的三个pod的持久化目录,创建好目录之后编写yaml创建zk-pv.yaml
apiVersion: v1
kind: PersistentVolume
metadata:
name: k8s-pv-zk01
namespace: tools
labels:
app: zk
annotations:
volume.beta.kubernetes.io/storage-class: "anything"
spec:
capacity:
storage: 5Gi
accessModes:
- ReadWriteOnce
hostPath:
path: /data/share/pv/zk01
persistentVolumeReclaimPolicy: Recycle
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: k8s-pv-zk02
namespace: tools
labels:
app: zk
annotations:
volume.beta.kubernetes.io/storage-class: "anything"
spec:
capacity:
storage: 5Gi
accessModes:
- ReadWriteOnce
hostPath:
path: /data/share/pv/zk02
persistentVolumeReclaimPolicy: Recycle
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: k8s-pv-zk03
namespace: tools
labels:
app: zk
annotations:
volume.beta.kubernetes.io/storage-class: "anything"
spec:
capacity:
storage: 5Gi
accessModes:
- ReadWriteOnce
hostPath:
path: /data/share/pv/zk03
persistentVolumeReclaimPolicy: Recycle
---
使用如下命令创建zk-pv
核心命令:kubectl create -f zk-pv.yaml / kubectl apply -f zk-pv.yaml
示例:
[root@master ~]# mkdir chaitc-zookeeper
[root@master ~]# cd chaitc-zookeeper/
[root@master chaitc-zookeeper]# vim zk-pv.yaml
[root@master chaitc-zookeeper]# kubectl create -f zk-pv.yaml
persistentvolume/k8s-pv-zk01 created
persistentvolume/k8s-pv-zk02 created
persistentvolume/k8s-pv-zk03 created
验证:
命令:kubectl get pv -n tools -o wide
使用statefulset
去部署zk集群的三节点,并且使用刚刚创建的pv作为存储设备。
zk.yaml
apiVersion: v1
kind: Service
metadata:
name: zk-hs
namespace: tools
labels:
app: zk
spec:
selector:
app: zk
clusterIP: None
ports:
- name: server
port: 2888
- name: leader-election
port: 3888
---
apiVersion: v1
kind: Service
metadata:
name: zk-cs
namespace: tools
labels:
app: zk
spec:
selector:
app: zk
type: NodePort
ports:
- name: client
port: 2181
nodePort: 31811
---
apiVersion: policy/v1beta1
kind: PodDisruptionBudget
metadata:
name: zk-pdb
namespace: tools
spec:
selector:
matchLabels:
app: zk
maxUnavailable: 1
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: zk
namespace: tools
spec:
selector:
matchLabels:
app: zk # has to match .spec.template.metadata.labels
serviceName: "zk-hs"
replicas: 3 # by default is 1
updateStrategy:
type: RollingUpdate
podManagementPolicy: Parallel
template:
metadata:
labels:
app: zk # has to match .spec.selector.matchLabels
spec:
containers:
- name: zk
imagePullPolicy: Always
image: chaotingge/zookeeper:kubernetes-zookeeper1.0-3.4.10
resources:
requests:
memory: "500Mi"
cpu: "0.5"
ports:
- containerPort: 2181
name: client
- containerPort: 2888
name: server
- containerPort: 3888
name: leader-election
command:
- sh
- -c
- "start-zookeeper \
--servers=3 \
--data_dir=/var/lib/zookeeper/data \
--data_log_dir=/var/lib/zookeeper/data/log \
--conf_dir=/opt/zookeeper/conf \
--client_port=2181 \
--election_port=3888 \
--server_port=2888 \
--tick_time=2000 \
--init_limit=10 \
--sync_limit=5 \
--heap=512M \
--max_client_cnxns=60 \
--snap_retain_count=3 \
--purge_interval=12 \
--max_session_timeout=40000 \
--min_session_timeout=4000 \
--log_level=INFO"
readinessProbe:
exec:
command:
- sh
- -c
- "zookeeper-ready 2181"
initialDelaySeconds: 10
timeoutSeconds: 5
livenessProbe:
exec:
command:
- sh
- -c
- "zookeeper-ready 2181"
initialDelaySeconds: 10
timeoutSeconds: 5
volumeMounts:
- name: datadir
mountPath: /var/lib/zookeeper
volumeClaimTemplates:
- metadata:
name: datadir
annotations:
volume.beta.kubernetes.io/storage-class: "anything"
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 1Gi
使用kubectl apply -f zk.yaml部署
备注:如果报错如下内容,需要更改yaml文件
Warning: policy/v1beta1 PodDisruptionBudget is deprecated in v1.21+, unavailable in v1.25+; use policy/v1 PodDisruptionBudget
[root@master zk01]# kubectl version
Client Version: version.Info{Major:"1", Minor:"22", GitVersion:"v1.22.4", GitCommit:"b695d79d4f967c403a96986f1750a35eb75e75f1", GitTreeState:"clean", BuildDate:"2021-11-17T15:48:33Z", GoVersion:"go1.16.10", Compiler:"gc", Platform:"linux/amd64"}
Server Version: version.Info{Major:"1", Minor:"22", GitVersion:"v1.22.0", GitCommit:"c2b5237ccd9c0f1d600d3072634ca66cefdf272f", GitTreeState:"clean", BuildDate:"2021-08-04T17:57:25Z", GoVersion:"go1.16.6", Compiler:"gc", Platform:"linux/amd64"}
这种版本需要使用 policy/v1
示例:
[root@master chaitc-zookeeper]# vim zk.yaml
[root@master chaitc-zookeeper]# kubectl apply -f zk.yaml
service/zk-hs created
Warning: policy/v1beta1 PodDisruptionBudget is deprecated in v1.21+, unavailable in v1.25+; use policy/v1 PodDisruptionBudget
poddisruptionbudget.policy/zk-pdb created
statefulset.apps/zk created
The Service "zk-cs" is invalid: spec.ports[0].nodePort: Invalid value: 21811: provided port is not in the valid range. The range of valid ports is 30000-32767
[root@master chaitc-zookeeper]# vim zk.yaml
[root@master chaitc-zookeeper]# kubectl apply -f zk.yaml
service/zk-hs unchanged
service/zk-cs created
Warning: policy/v1beta1 PodDisruptionBudget is deprecated in v1.21+, unavailable in v1.25+; use policy/v1 PodDisruptionBudget
poddisruptionbudget.policy/zk-pdb configured
statefulset.apps/zk configured
[root@master chaitc-zookeeper]# kubectl get pods -n tools
NAME READY STATUS RESTARTS AGE
zk-0 0/1 Pending 0 82s
zk-1 0/1 Pending 0 82s
zk-2 0/1 Pending 0 82s
[root@master chaitc-zookeeper]# kubectl get pods -n tools
NAME READY STATUS RESTARTS AGE
zk-0 0/1 Pending 0 86s
zk-1 0/1 Pending 0 86s
zk-2 0/1 Pending 0 86s
[root@master chaitc-zookeeper]# kubectl describe pod zk-0 -n tools
Name: zk-0
Namespace: tools
Priority: 0
Node:
Labels: app=zk
controller-revision-hash=zk-78bbbb488c
statefulset.kubernetes.io/pod-name=zk-0
Annotations:
Status: Pending
IP:
IPs:
Controlled By: StatefulSet/zk
Containers:
zk:
Image: leolee32/kubernetes-library:kubernetes-zookeeper1.0-3.4.10
Ports: 2181/TCP, 2888/TCP, 3888/TCP
Host Ports: 0/TCP, 0/TCP, 0/TCP
Command:
sh
-c
start-zookeeper --servers=3 --data_dir=/var/lib/zookeeper/data --data_log_dir=/var/lib/zookeeper/data/log --conf_dir=/opt/zookeeper/conf --client_port=2181 --election_port=3888 --server_port=2888 --tick_time=2000 --init_limit=10 --sync_limit=5 --heap=512M --max_client_cnxns=60 --snap_retain_count=3 --purge_interval=12 --max_session_timeout=40000 --min_session_timeout=4000 --log_level=INFO
Requests:
cpu: 500m
memory: 500Mi
Liveness: exec [sh -c zookeeper-ready 2181] delay=10s timeout=5s period=10s #success=1 #failure=3
Readiness: exec [sh -c zookeeper-ready 2181] delay=10s timeout=5s period=10s #success=1 #failure=3
Environment:
Mounts:
/var/lib/zookeeper from datadir (rw)
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-tzp6v (ro)
Conditions:
Type Status
PodScheduled False
Volumes:
datadir:
Type: PersistentVolumeClaim (a reference to a PersistentVolumeClaim in the same namespace)
ClaimName: datadir-zk-0
ReadOnly: false
kube-api-access-tzp6v:
Type: Projected (a volume that contains injected data from multiple sources)
TokenExpirationSeconds: 3607
ConfigMapName: kube-root-ca.crt
ConfigMapOptional:
DownwardAPI: true
QoS Class: Burstable
Node-Selectors:
Tolerations: node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Warning FailedScheduling 108s default-scheduler 0/1 nodes are available: 1 pod has unbound immediate PersistentVolumeClaims.
Warning FailedScheduling 107s default-scheduler 0/1 nodes are available: 1 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn't tolerate.
Warning FailedScheduling 30s default-scheduler 0/1 nodes are available: 1 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn't tolerate.
由于笔者是单节点部署的,master上有打的污点标记,如下:
[root@master chaitc-zookeeper]# kubectl get no -o yaml | grep taint -A 5
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/master
status:
addresses:
- address: 172.24.40.43
[root@master chaitc-zookeeper]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master.chaitc.xyz Ready control-plane,master 132d v1.22.4
使用命令:kubectl describe node <具体node>
里面有一行:
Taints: node-role.kubernetes.io/master:NoSchedule
[root@master chaitc-zookeeper]# kubectl taint node master.chaitc.xyz node-role.kubernetes.io/master-
node/master.chaitc.xyz untainted
[root@master chaitc-zookeeper]# kubectl describe node master.chaitc.xyz
里面有一行:
Taints:
查看pod就可以看到:
[root@master chaitc-zookeeper]# kubectl get po -n tools
NAME READY STATUS RESTARTS AGE
zk-0 1/1 Running 0 15m
zk-1 1/1 Running 0 15m
zk-2 0/1 Pending 0 15m
查看pending的详情:
[root@master chaitc-zookeeper]# kubectl describe po zk-2 -n tools
Name: zk-2
Namespace: tools
Priority: 0
Node:
Labels: app=zk
controller-revision-hash=zk-78bbbb488c
statefulset.kubernetes.io/pod-name=zk-2
Annotations:
Status: Pending
IP:
IPs:
Controlled By: StatefulSet/zk
Containers:
zk:
Image: leolee32/kubernetes-library:kubernetes-zookeeper1.0-3.4.10
Ports: 2181/TCP, 2888/TCP, 3888/TCP
Host Ports: 0/TCP, 0/TCP, 0/TCP
Command:
sh
-c
start-zookeeper --servers=3 --data_dir=/var/lib/zookeeper/data --data_log_dir=/var/lib/zookeeper/data/log --conf_dir=/opt/zookeeper/conf --client_port=2181 --election_port=3888 --server_port=2888 --tick_time=2000 --init_limit=10 --sync_limit=5 --heap=512M --max_client_cnxns=60 --snap_retain_count=3 --purge_interval=12 --max_session_timeout=40000 --min_session_timeout=4000 --log_level=INFO
Requests:
cpu: 500m
memory: 500Mi
Liveness: exec [sh -c zookeeper-ready 2181] delay=10s timeout=5s period=10s #success=1 #failure=3
Readiness: exec [sh -c zookeeper-ready 2181] delay=10s timeout=5s period=10s #success=1 #failure=3
Environment:
Mounts:
/var/lib/zookeeper from datadir (rw)
/var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-l9bbx (ro)
Conditions:
Type Status
PodScheduled False
Volumes:
datadir:
Type: PersistentVolumeClaim (a reference to a PersistentVolumeClaim in the same namespace)
ClaimName: datadir-zk-2
ReadOnly: false
kube-api-access-l9bbx:
Type: Projected (a volume that contains injected data from multiple sources)
TokenExpirationSeconds: 3607
ConfigMapName: kube-root-ca.crt
ConfigMapOptional:
DownwardAPI: true
QoS Class: Burstable
Node-Selectors:
Tolerations: node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Warning FailedScheduling 16m default-scheduler 0/1 nodes are available: 1 pod has unbound immediate PersistentVolumeClaims.
Warning FailedScheduling 16m default-scheduler 0/1 nodes are available: 1 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn't tolerate.
Warning FailedScheduling 15m default-scheduler 0/1 nodes are available: 1 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn't tolerate.
Warning FailedScheduling 38s default-scheduler 0/1 nodes are available: 1 Insufficient cpu.
原因:节点性能不足:
1 Insufficient cpu, 1 node(s) had taints that the pod didn't tolerate.
节点性能不足了,增加节点试试
验证:
[root@master chaitc-zookeeper]# kubect get svc -n tool
-bash: kubect: command not found
[root@master chaitc-zookeeper]# kubectl get svc -n tools
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
zk-cs NodePort 10.108.177.133 2181:31811/TCP 18m
zk-hs ClusterIP None 2888/TCP,3888/TCP 18m
[root@master chaitc-zookeeper]# ss -tan | grep 31811
LISTEN 0 128 *:31811 *:*
[root@master chaitc-zookeeper]# kubectl get svc -n tools
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
zk-cs NodePort 10.108.177.133 2181:31811/TCP 18m
zk-hs ClusterIP None 2888/TCP,3888/TCP 19m
[root@master chaitc-zookeeper]# kubectl exec -it zk-1 -n tools -- /bin/sh
# ps -ef
UID PID PPID C STIME TTY TIME CMD
root 1 0 0 14:50 ? 00:00:00 sh -c start-zookeeper --servers=3 --data_dir=/var/lib/zookeeper/data --data_log_dir=/var/lib/zookeeper/data/l
root 6 1 0 14:50 ? 00:00:00 /usr/lib/jvm/java-8-openjdk-amd64/bin/java -Dzookeeper.log.dir=/var/log/zookeeper -Dzookeeper.root.logger=INF
root 572 0 0 14:54 pts/0 00:00:00 /bin/sh
root 597 572 0 14:54 pts/0 00:00:00 ps -ef
# env
ZK_CS_PORT_2181_TCP_PORT=2181
ZK_CS_SERVICE_HOST=10.108.177.133
KUBERNETES_SERVICE_PORT=443
KUBERNETES_PORT=tcp://10.96.0.1:443
ZK_CS_PORT_2181_TCP_PROTO=tcp
HOSTNAME=zk-1
ZK_CS_SERVICE_PORT_CLIENT=2181
HOME=/root
ZK_CS_SERVICE_PORT=2181
ZK_CS_PORT=tcp://10.108.177.133:2181
ZK_DATA_LOG_DIR=/var/lib/zookeeper/log
ZK_CS_PORT_2181_TCP=tcp://10.108.177.133:2181
TERM=xterm
KUBERNETES_PORT_443_TCP_ADDR=10.96.0.1
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ZK_LOG_DIR=/var/log/zookeeper
KUBERNETES_PORT_443_TCP_PORT=443
KUBERNETES_PORT_443_TCP_PROTO=tcp
ZK_USER=zookeeper
KUBERNETES_SERVICE_PORT_HTTPS=443
KUBERNETES_PORT_443_TCP=tcp://10.96.0.1:443
KUBERNETES_SERVICE_HOST=10.96.0.1
JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
PWD=/
ZK_CS_PORT_2181_TCP_ADDR=10.108.177.133
ZK_DATA_DIR=/var/lib/zookeeper/data
# cd /usr/bin
查看zk状态:
# zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/bin/../etc/zookeeper/zoo.cfg
Mode: leader
# exit
[root@master chaitc-zookeeper]#
查看3台zk状态:
# 查看所有zk节点的状态
for i in 0 1 2; do kubectl exec zk-$i -n tools /usr/bin/zkServer.sh status; done
PodDisruptionBudget
k8s可以为每个应用程序创建 PodDisruptionBudget 对象(PDB)。PDB 将限制在同一时间因自愿干扰导致的复制应用程序中宕机的 pod 数量。
可以通过两个参数来配置PodDisruptionBudget:(需要注意的是,MinAvailable参数和MaxUnavailable参数只能同时配置一个。)
MinAvailable:表示最小可用POD数,表示应用POD集群处于运行状态的最小POD数量,或者是运行状态的POD数同总POD数的最小百分比
MaxUnavailable:表示最大不可用PO数,表示应用POD集群处于不可用状态的最大POD数,或者是不可用状态的POD数同总POD数的最大百分比
卸载zk:
kubectl delete StatefulSet zk -n tools
kubectl delete PodDisruptionBudget zk-pdb -n tools
kubectl delete svc zk-cs -n tools
kubectl delete svc zk-hs -n tools
kubectl delete pvc datadir-zk-0 -n tools
kubectl delete pvc datadir-zk-1 -n tools
kubectl delete pvc datadir-zk-2 -n tools
kubectl delete pv k8s-pv-zk01
kubectl delete pv k8s-pv-zk02
kubectl delete pv k8s-pv-zk03
卸载kafka:
kubectl delete StatefulSet kafka -n tools
kubectl delete PodDisruptionBudget kafka-pdb -n tools
kubectl delete Service kafka-cs -n tools
kubectl delete Service kafka-hs -n tools
kubectl delete pvc datadir-kafka-0 -n tools
kubectl delete pv k8s-pv-kafka01
kubectl delete pv k8s-pv-kafka02
kubectl delete pv k8s-pv-kafka03