- 这里采用的部署方式如下:
- 使用自定义的 debian 镜像作为基础镜像
- 目的1:可以塞很多排查工具进去
- 目的2:一个统一的基础镜像,方便维护
- 目的3:centos 后期不维护了,避免尴尬场景
- 通过 gfs 做数据持久化,通过 pv 和 pvc 的形式将二进制文件挂载到 pod 内
- kafka 的二进制文件里面带有了 zookeeper,这里就只使用 kafka 的二进制文件
FROM debian:11
ENV TZ=Asia/Shanghai
ENV LANG=en_US.UTF-8
RUN echo "" > /etc/apt/sources.list && \
for i in stable stable-proposed-updates stable-updates;\
do \
echo "deb http://mirrors.cloud.aliyuncs.com/debian ${i} main contrib non-free" >> /etc/apt/sources.list;\
echo "deb-src http://mirrors.cloud.aliyuncs.com/debian ${i} main contrib non-free" >> /etc/apt/sources.list;\
echo "deb http://mirrors.aliyun.com/debian ${i} main contrib non-free" >> /etc/apt/sources.list;\
echo "deb-src http://mirrors.aliyun.com/debian ${i} main contrib non-free" >> /etc/apt/sources.list;\
done && \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends vim \
curl wget bind9-utils telnet unzip net-tools tree nmap ncat && \
apt-get clean && apt-get autoclean
DEBIAN_FRONTEND=noninteractive
- 非交互模式
--no-install-recommends
- 此选项告诉
apt-get
不要安装与请求的软件包一起推荐的软件包。- Debian 软件包的依赖关系可以分为两种类型:“Depends” 和 “Recommends”。
- 通过使用此选项,您表示只想安装 “Depends” 部分中列出的软件包,并跳过 “Recommends” 部分。
- 如果您不需要所有推荐的软件包,这可以帮助保持安装的系统更加精简
docker build -t debian11_amd64_base:v1.0 .
我的环境做了软连接,下面的命令中出现的 k 表示 kubectl 命令
k create ns bigdata
正如开头提到的,我这边使用的是 gfs 来做的持久化,需要通过 endpoints 来暴露给 k8s 集群内部使用,相关的资料可以看我其他的文章
- CentOS 7.6 部署 GlusterFS 分布式存储系统
- Kubernetes 集群使用 GlusterFS 作为数据持久化存储
---
apiVersion: v1
kind: Endpoints
metadata:
annotations:
name: glusterfs-bigdata
namespace: bigdata
subsets:
- addresses:
- ip: 172.72.0.130
- ip: 172.72.0.131
ports:
- port: 49152
protocol: TCP
---
apiVersion: v1
kind: Service
metadata:
annotations:
name: glusterfs-bigdata
namespace: bigdata
spec:
ports:
- port: 49152
protocol: TCP
targetPort: 49152
sessionAffinity: None
type: ClusterIP
---
apiVersion: v1
kind: PersistentVolume
metadata:
annotations:
labels:
software: bigdata
name: bigdata-software-pv
spec:
accessModes:
- ReadOnlyMany
capacity:
storage: 10Gi
glusterfs:
endpoints: glusterfs-bigdata
path: online-share/kubernetes/software/
readOnly: false
persistentVolumeReclaimPolicy: Retain
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
annotations:
labels:
software: bigdata
name: bigdata-software-pvc
namespace: bigdata
spec:
accessModes:
- ReadOnlyMany
resources:
requests:
storage: 10Gi
selector:
matchLabels:
software: bigdata
检查 pvc 是否处于 bound 状态
k get pvc -n bigdata
正确创建的情况下,STATUS 的状态是 bound
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
bigdata-software-pvc Bound bigdata-software-pv 10Gi ROX 87s
---
apiVersion: v1
data:
startZk.sh: |-
#!/bin/bash
set -x
echo "${POD_NAME##*-}" > ${ZK_DATA}/myid
sed "s|{{ ZK_DATA }}|${ZK_DATA}|g" ${CM_DIR}/zookeeper.properties > ${ZK_CONF}/zookeeper.properties
echo "" >> ${ZK_CONF}/zookeeper.properties
n=0
while (( n++ < ${REPLICAS} ))
do
echo "server.$((n-1))=${APP_NAME}-$((n-1)).${APP_NAME}-svc.${NAMESPACE}.svc.cluster.local:2888:3888" >> ${ZK_CONF}/zookeeper.properties
done
cat ${ZK_CONF}/zookeeper.properties
KAFKA_HEAP_OPTS="-Xmx${JAVA_OPT_XMX} -Xms${JAVA_OPT_XMS} -Xss512k -XX:+UseG1GC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:MaxGCPauseMillis=200 -XX:InitiatingHeapOccupancyPercent=45 -Djava.io.tmpdir=/tmp -Xloggc:${LOG_DIR}/gc.log -Dsun.net.inetaddr.ttl=10"
${ZK_HOME}/bin/zookeeper-server-start.sh ${ZK_CONF}/zookeeper.properties
zookeeper.properties: |-
dataDir={{ ZK_DATA }}
clientPort=2181
maxClientCnxns=0
initLimit=1
syncLimit=1
kind: ConfigMap
metadata:
annotations:
labels:
app: zk
name: zk-cm
namespace: bigdata
---
apiVersion: v1
kind: Service
metadata:
annotations:
labels:
app: zk
name: zk-svc
namespace: bigdata
spec:
ports:
- name: tcp
port: 2181
- name: server
port: 2888
- name: elect
port: 3888
selector:
app: zk
启动 statefulset 之前,需要先给节点打上标签,因为针对 pod 做了节点和 pod 的亲和性,因为 zookeeper 的数据是通过 hostpath 的方式来持久化的,所以需要固定节点,同时需要 pod 亲和性来控制一个节点只能出现一个 zookeeper 的 pod,避免 hostpath 出现问题
k label node 172.72.0.129 zk=
k label node 172.72.0.130 zk=
k label node 172.72.0.131 zk=
创建 statefulset
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
annotations:
labels:
app: zk
name: zk
namespace: bigdata
spec:
replicas: 3
selector:
matchLabels:
app: zk
serviceName: zk-svc
template:
metadata:
annotations:
labels:
app: zk
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: zk
operator: Exists
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: In
values:
- zk
topologyKey: kubernetes.io/hostname
containers:
- command:
- bash
- /app/zk/cm/startZk.sh
env:
- name: APP_NAME
value: zk
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: ZK_HOME
value: /app/software/kafka_2.12-2.3.0
- name: REPLICAS
value: "3"
- name: ZK_DATA
value: /app/zk/data
# LOG_DIR 是 kafka-run-class.sh 启动 zk 的时候使用的环境变量
- name: LOG_DIR
value: /app/zk/log
- name: ZK_CONF
value: /app/zk/conf
- name: CM_DIR
value: /app/zk/cm
- name: JAVA_HOME
value: /app/software/jdk1.8.0_231
- name: JAVA_OPT_XMS
value: 512m
- name: JAVA_OPT_XMX
value: 512m
image: debian11_amd64_base:v1.0
imagePullPolicy: IfNotPresent
livenessProbe:
tcpSocket:
port: 2181
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 30
successThreshold: 1
timeoutSeconds: 5
readinessProbe:
tcpSocket:
port: 2181
failureThreshold: 3
initialDelaySeconds: 20
periodSeconds: 30
successThreshold: 1
timeoutSeconds: 5
name: zk
ports:
- containerPort: 2181
name: tcp
- containerPort: 2888
name: server
- containerPort: 3888
name: elect
volumeMounts:
- mountPath: /app/zk/data
name: data
- mountPath: /app/zk/log
name: log
- mountPath: /app/zk/cm
name: cm
- mountPath: /app/zk/conf
name: conf
- mountPath: /app/software
name: software
readOnly: true
restartPolicy: Always
securityContext: {}
terminationGracePeriodSeconds: 0
volumes:
- emptyDir: {}
name: log
- emptyDir: {}
name: conf
- configMap:
name: zk-cm
name: cm
- name: software
persistentVolumeClaim:
claimName: bigdata-software-pvc
- hostPath:
path: /data/k8s_data/zookeeper
type: DirectoryOrCreate
name: data
---
apiVersion: v1
data:
server.properties: |-
broker.id={{ broker.id }}
broker.rack={{ broker.rack }}
log.dirs={{ DATA_DIR }}
listeners=INTERNAL://0.0.0.0:9092, EXTERNAL://0.0.0.0:9093
advertised.listeners=INTERNAL://{{ broker.name }}:9092,EXTERNAL://{{ broker.host }}:9093
listener.security.protocol.map=INTERNAL:PLAINTEXT,EXTERNAL:PLAINTEXT
inter.broker.listener.name=INTERNAL
zookeeper.connect={{ ZOOKEEPER_CONNECT }}
auto.create.topics.enable=false
default.replication.factor=2
num.partitions: 3
num.network.threads: 3
num.io.threads: 6
socket.send.buffer.bytes: 102400
socket.receive.buffer.bytes: 102400
socket.request.max.bytes: 104857600
num.recovery.threads.per.data.dir: 1
offsets.topic.replication.factor: 2
transaction.state.log.replication.factor: 2
transaction.state.log.min.isr: 2
log.retention.hours: 168
log.segment.bytes: 1073741824
log.retention.check.interval.ms: 300000
zookeeper.connection.timeout.ms: 6000
group.initial.rebalance.delay.ms: 0
delete.topic.enable: true
startKafka.sh: |-
#!/bin/bash
set -x
if [ -f ${DATA_DIR}/meta.properties ];then
KAFKA_BROKER_ID=$(awk -F '=' '/broker.id/ {print $NF}' app/kafka/data/meta.properties)
else
KAFKA_BROKER_ID=${POD_NAME##*-}
fi
ZOOKEEPER_CONNECT='zk-0.zk-svc.bigdata.svc.cluster.local:2181,zk-1.zk-svc.bigdata.svc.cluster.local:2181,zk-2.zk-svc.bigdata.svc.cluster.local:2181'
sed "s|{{ broker.id }}|${KAFKA_BROKER_ID}|g" ${CM_DIR}/server.properties > ${CONF_DIR}/server.properties
sed -i "s|{{ broker.rack }}|${NODE_NAME}|g" ${CONF_DIR}/server.properties
sed -i "s|{{ broker.host }}|${NODE_NAME}|g" ${CONF_DIR}/server.properties
sed -i "s|{{ broker.name }}|${POD_NAME}.${APP_NAME}-svc.${NAMESPACE}.svc.cluster.local|g" ${CONF_DIR}/server.properties
sed -i "s|{{ ZOOKEEPER_CONNECT }}|${ZOOKEEPER_CONNECT}|g" ${CONF_DIR}/server.properties
sed -i "s|{{ DATA_DIR }}|${DATA_DIR}|g" ${CONF_DIR}/server.properties
cat ${CONF_DIR}/server.properties
export KAFKA_HEAP_OPTS="-Xmx${JAVA_OPT_XMX} -Xms${JAVA_OPT_XMS} -Xss512k -XX:+UseG1GC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:MaxGCPauseMillis=200 -XX:InitiatingHeapOccupancyPercent=45 -Djava.io.tmpdir=/tmp -Xloggc:${LOG_DIR}/gc.log -Dsun.net.inetaddr.ttl=10"
${KAFKA_HOME}/bin/kafka-server-start.sh ${CONF_DIR}/server.properties
sleep 3
kind: ConfigMap
metadata:
annotations:
labels:
app: kafka
name: kafka-cm
namespace: bigdata
---
apiVersion: v1
kind: Service
metadata:
annotations:
labels:
app: kafka
name: kafka-svc
namespace: bigdata
spec:
clusterIP: None
ports:
- name: tcp
port: 9092
targetPort: 9092
selector:
app: kafka
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
annotations:
labels:
app: kafka
name: kafka
namespace: bigdata
spec:
replicas: 3
selector:
matchLabels:
app: kafka
serviceName: kafka-svc
template:
metadata:
labels:
app: kafka
spec:
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: In
values:
- kafka
topologyKey: kubernetes.io/hostname
containers:
- command:
- /bin/bash
- -c
- . ${CM_DIR}/startKafka.sh
env:
- name: APP_NAME
value: kafka
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: KAFKA_HOME
value: /app/software/kafka_2.12-2.3.0
- name: DATA_DIR
value: /app/kafka/data
- name: LOG_DIR
value: /app/kafka/log
- name: CONF_DIR
value: /app/kafka/conf
- name: CM_DIR
value: /app/kafka/configmap
- name: JAVA_HOME
value: /app/software/jdk1.8.0_231
- name: JAVA_OPT_XMS
value: 512m
- name: JAVA_OPT_XMX
value: 512m
name: kafka
image: debian11_amd64_base:v1.0
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 3
initialDelaySeconds: 60
periodSeconds: 20
successThreshold: 1
tcpSocket:
port: kafka
timeoutSeconds: 1
readinessProbe:
failureThreshold: 3
initialDelaySeconds: 20
periodSeconds: 20
successThreshold: 1
tcpSocket:
port: kafka
timeoutSeconds: 1
ports:
- containerPort: 9092
hostPort: 9092
name: kafka
- containerPort: 9093
hostPort: 9093
name: kafkaout
volumeMounts:
- mountPath: /app/kafka/data
name: data
- mountPath: /app/kafka/log
name: log
- mountPath: /app/kafka/configmap
name: configmap
- mountPath: /app/kafka/conf
name: conf
- mountPath: /app/software
name: software
readOnly: true
restartPolicy: Always
securityContext: {}
terminationGracePeriodSeconds: 10
volumes:
- emptyDir: {}
name: log
- emptyDir: {}
name: conf
- configMap:
name: kafka-cm
name: configmap
- name: software
persistentVolumeClaim:
claimName: bigdata-software-pvc
- name: data
hostPath:
path: /data/k8s_data/kafka
type: DirectoryOrCreate