etcd用k8s的statefulset实现, 一个statefulset的yaml就能实现etcd的集群的动态伸缩, 当你减少statefulset的replicas的时候(或者节点出故障down掉), 自动的把节点从etcd集群中移除,当时增加statefulset的replicas的时候(或者故障节点恢复),自动的把节点添加到etcd集群中. 但是当你减少的集群节点数目小于初始化集群的时候设置的最小集群节点个数的时候, 减少集群节点(或者节点出故障down掉)不会将节点从集群中移除, 新增节点(或者故障节点恢复),自动的把新增节点(或者故障节点恢复)更新到etcd集群中. 声明下该statefulset是基于k8s官方提供的原版yaml修改测试后的. 文件位置,最新源码中(老版本源码中不一定会有): https://github.com/kubernetes/kubernetes/test/e2e/testing-manifests/statefulset/etcd 中的yaml. 但是该yaml无法正常的创建etcd集群, 存在很多问题. 可以自己去测试.
修改后的yaml如下. 如果自己的k8s中不提供存储支持, 请自行将yaml的的pvc改成emptyDir的volume即可. 另外镜像可以自己换成etcd官方镜像.
yaml如下:
apiVersion: v1
kind: Service
metadata:
name: etcd
labels:
app: etcd
spec:
ports:
- port: 2380
name: etcd-server
- port: 2379
name: etcd-client
clusterIP: None
selector:
app: etcd
publishNotReadyAddresses: true
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: etcd
labels:
app: etcd
spec:
serviceName: etcd
replicas: 3
selector:
matchLabels:
app: etcd
template:
metadata:
name: etcd
labels:
app: etcd
spec:
containers:
- name: etcd
image: gcr.k8s.io/etcd:3.2.24
imagePullPolicy: Always
ports:
- containerPort: 2380
name: peer
- containerPort: 2379
name: client
resources:
requests:
cpu: 100m
memory: 512Mi
env:
- name: INITIAL_CLUSTER_SIZE
value: "3"
- name: SET_NAME
value: etcd
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: meta.namepace
volumeMounts:
- name: datadir
mountPath: /var/run/etcd
lifecycle:
preStop:
exec:
command:
- "/bin/sh"
- "-ec"
- |
EPS=""
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SET_NAME}:2379"
done
HOSTNAME=$(hostname)
member_hash() {
etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
}
# Remove everything otherwise the cluster will no longer scale-up
SET_ID=${HOSTNAME:5:${#HOSTNAME}}
# adding a new member to existing cluster (assuming all initial pods are available)
if [ "${SET_ID}" -ge ${INITIAL_CLUSTER_SIZE} ]; then
echo "Removing ${HOSTNAME} from etcd cluster"
ETCDCTL_ENDPOINT=${EPS} etcdctl member remove $(member_hash)
if [ $? -eq 0 ]; then
rm -rf /var/run/etcd/*
fi
fi
command:
- "/bin/sh"
- "-ec"
- |
HOSTNAME=$(hostname)
# store member id into PVC for later member replacement
collect_member() {
while ! etcdctl member list &>/dev/null; do sleep 1; done
etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1 > /var/run/etcd/member_id
exit 0
}
eps() {
EPS=""
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SET_NAME}:2379"
done
echo ${EPS}
}
member_hash() {
etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
}
# re-joining after failure?
if [ -e /var/run/etcd/default.etcd ]; then
echo "Re-joining etcd member"
member_id=$(cat /var/run/etcd/member_id)
# re-join member
POD_IP=$(hostname -i)
ETCDCTL_ENDPOINT=$(eps) etcdctl member update ${member_id} http://${HOSTNAME}.${SET_NAME}:2380
exec etcd --name ${HOSTNAME} \
--listen-peer-urls http://${POD_IP}:2380 \
--listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://${POD_IP}:2379 \
--data-dir /var/run/etcd/default.etcd
fi
# etcd-SET_ID
SET_ID=${HOSTNAME:5:${#HOSTNAME}}
# adding a new member to existing cluster (assuming all initial pods are available)
if [ "${SET_ID}" -ge ${INITIAL_CLUSTER_SIZE} ]; then
export ETCDCTL_ENDPOINT=$(eps)
# member already added?
MEMBER_HASH=$(member_hash)
if [ -n "${MEMBER_HASH}" ]; then
# the member hash exists but for some reason etcd failed
# as the datadir has not be created, we can remove the member
# and retrieve new hash
etcdctl member remove ${MEMBER_HASH}
fi
echo "Adding new member"
etcdctl member add ${HOSTNAME} http://${HOSTNAME}.${SET_NAME}:2380 | grep "^ETCD_" > /var/run/etcd/new_member_envs
if [ $? -ne 0 ]; then
echo "Exiting"
rm -f /var/run/etcd/new_member_envs
exit 1
fi
cat /var/run/etcd/new_member_envs
source /var/run/etcd/new_member_envs
collect_member &
POD_IP=$(hostname -i)
exec etcd --name ${HOSTNAME} \
--listen-peer-urls http://${POD_IP}:2380 \
--listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://${POD_IP}:2379 \
--data-dir /var/run/etcd/default.etcd \
--initial-advertise-peer-urls http://${HOSTNAME}.${SET_NAME}:2380 \
--initial-cluster ${ETCD_INITIAL_CLUSTER} \
--initial-cluster-state ${ETCD_INITIAL_CLUSTER_STATE}
fi
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
while true; do
echo "Waiting for ${SET_NAME}-${i}.${SET_NAME} to come up"
ping -W 1 -c 1 ${SET_NAME}-${i}.${SET_NAME}.${NAMESPACE}.svc.cluster.local > /dev/null && break
sleep 1s
done
done
PEERS=""
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
PEERS="${PEERS}${PEERS:+,}${SET_NAME}-${i}=http://${SET_NAME}-${i}.${SET_NAME}:2380"
done
collect_member &
# join member
POD_IP=$(hostname -i)
exec etcd --name ${HOSTNAME} \
--initial-advertise-peer-urls http://${POD_IP}:2380 \
--listen-peer-urls http://${POD_IP}:2380 \
--listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://${POD_IP}:2379 \
--initial-cluster-token etcd-cluster-1 \
--initial-cluster ${PEERS} \
--initial-cluster-state new \
--data-dir /var/run/etcd/default.etcd
volumeClaimTemplates:
- metadata:
name: datadir
spec:
accessModes:
- "ReadWriteOnce"
resources:
requests:
# upstream recommended max is 700M
storage: 1Gi
emptyDir的yaml如下:
apiVersion: v1
kind: Service
metadata:
name: etcd
labels:
app: etcd
spec:
ports:
- port: 2380
name: etcd-server
- port: 2379
name: etcd-client
clusterIP: None
selector:
app: etcd
publishNotReadyAddresses: true
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: etcd
labels:
app: etcd
spec:
serviceName: etcd
replicas: 3
selector:
matchLabels:
app: etcd
template:
metadata:
name: etcd
labels:
app: etcd
spec:
volumes:
- name: datadir
emptyDir: {}
containers:
- name: etcd
image: gcr.k8s.io/etcd:3.2.24
imagePullPolicy: Always
ports:
- containerPort: 2380
name: peer
- containerPort: 2379
name: client
resources:
requests:
cpu: 100m
memory: 512Mi
env:
- name: INITIAL_CLUSTER_SIZE
value: "3"
- name: SET_NAME
value: etcd
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: meta.namepace
volumeMounts:
- name: datadir
mountPath: /var/run/etcd
lifecycle:
preStop:
exec:
command:
- "/bin/sh"
- "-ec"
- |
EPS=""
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SET_NAME}:2379"
done
HOSTNAME=$(hostname)
member_hash() {
etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
}
# Remove everything otherwise the cluster will no longer scale-up
SET_ID=${HOSTNAME:5:${#HOSTNAME}}
# adding a new member to existing cluster (assuming all initial pods are available)
if [ "${SET_ID}" -ge ${INITIAL_CLUSTER_SIZE} ]; then
echo "Removing ${HOSTNAME} from etcd cluster"
ETCDCTL_ENDPOINT=${EPS} etcdctl member remove $(member_hash)
if [ $? -eq 0 ]; then
rm -rf /var/run/etcd/*
fi
fi
command:
- "/bin/sh"
- "-ec"
- |
HOSTNAME=$(hostname)
# store member id into PVC for later member replacement
collect_member() {
while ! etcdctl member list &>/dev/null; do sleep 1; done
etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1 > /var/run/etcd/member_id
exit 0
}
eps() {
EPS=""
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
EPS="${EPS}${EPS:+,}http://${SET_NAME}-${i}.${SET_NAME}:2379"
done
echo ${EPS}
}
member_hash() {
etcdctl member list | grep http://${HOSTNAME}.${SET_NAME}:2380 | cut -d':' -f1 | cut -d'[' -f1
}
# re-joining after failure?
if [ -e /var/run/etcd/default.etcd ]; then
echo "Re-joining etcd member"
member_id=$(cat /var/run/etcd/member_id)
# re-join member
POD_IP=$(hostname -i)
ETCDCTL_ENDPOINT=$(eps) etcdctl member update ${member_id} http://${HOSTNAME}.${SET_NAME}:2380
exec etcd --name ${HOSTNAME} \
--listen-peer-urls http://${POD_IP}:2380 \
--listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://${POD_IP}:2379 \
--data-dir /var/run/etcd/default.etcd
fi
# etcd-SET_ID
SET_ID=${HOSTNAME:5:${#HOSTNAME}}
# adding a new member to existing cluster (assuming all initial pods are available)
if [ "${SET_ID}" -ge ${INITIAL_CLUSTER_SIZE} ]; then
export ETCDCTL_ENDPOINT=$(eps)
# member already added?
MEMBER_HASH=$(member_hash)
if [ -n "${MEMBER_HASH}" ]; then
# the member hash exists but for some reason etcd failed
# as the datadir has not be created, we can remove the member
# and retrieve new hash
etcdctl member remove ${MEMBER_HASH}
fi
echo "Adding new member"
etcdctl member add ${HOSTNAME} http://${HOSTNAME}.${SET_NAME}:2380 | grep "^ETCD_" > /var/run/etcd/new_member_envs
if [ $? -ne 0 ]; then
echo "Exiting"
rm -f /var/run/etcd/new_member_envs
exit 1
fi
cat /var/run/etcd/new_member_envs
source /var/run/etcd/new_member_envs
collect_member &
POD_IP=$(hostname -i)
exec etcd --name ${HOSTNAME} \
--listen-peer-urls http://${POD_IP}:2380 \
--listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://${POD_IP}:2379 \
--data-dir /var/run/etcd/default.etcd \
--initial-advertise-peer-urls http://${HOSTNAME}.${SET_NAME}:2380 \
--initial-cluster ${ETCD_INITIAL_CLUSTER} \
--initial-cluster-state ${ETCD_INITIAL_CLUSTER_STATE}
fi
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
while true; do
echo "Waiting for ${SET_NAME}-${i}.${SET_NAME} to come up"
ping -W 1 -c 1 ${SET_NAME}-${i}.${SET_NAME}.${NAMESPACE}.svc.cluster.local > /dev/null && break
sleep 1s
done
done
PEERS=""
for i in $(seq 0 $((${INITIAL_CLUSTER_SIZE} - 1))); do
PEERS="${PEERS}${PEERS:+,}${SET_NAME}-${i}=http://${SET_NAME}-${i}.${SET_NAME}:2380"
done
collect_member &
# join member
POD_IP=$(hostname -i)
exec etcd --name ${HOSTNAME} \
--initial-advertise-peer-urls http://${POD_IP}:2380 \
--listen-peer-urls http://${POD_IP}:2380 \
--listen-client-urls http://${POD_IP}:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://${POD_IP}:2379 \
--initial-cluster-token etcd-cluster-1 \
--initial-cluster ${PEERS} \
--initial-cluster-state new \
--data-dir /var/run/etcd/default.etcd
# volumeClaimTemplates:
# - metadata:
# name: datadir
# spec:
# accessModes:
# - "ReadWriteOnce"
# resources:
# requests:
# # upstream recommended max is 700M
# storage: 1Gi