etcd 是基于 Raft 的分布式 key-value 存储系统,由 CoreOS 开发,常用于服务发现、共享配置以及并发控制(如 leader 选举、分布式锁等)。kubernetes 使用 etcd 存储所有运行数据。
本文档介绍部署一个三节点高可用 etcd 集群的步骤:
etcd 集群各节点的名称和 IP 如下:
注意:如果没有特殊指明,本文档的所有操作均在 k8s-master01 节点上执行,然后远程分发文件和执行命令。
wget https://github.com/etcd-io/etcd/releases/download/v3.3.12/etcd-v3.3.12-linux-amd64.tar.gz
#解压
tar -zxvf etcd-v3.3.12-linux-amd64.tar.gz -C /opt/kubernetes/package/
#验证是否解压成功
ls /opt/kubernetes/package/etcd-v3.3.12-linux-amd64/
source /root/env.sh
for master_ip in ${MASTER_IPS[@]}
do
echo -e "\033[31m>>> ${master_ip} \033[0m"
scp /opt/kubernetes/package/etcd-v3.3.12-linux-amd64/etcd* root@${master_ip}:/opt/kubernetes/bin
ssh root@${master_ip} "chmod +x /opt/kubernetes/bin/*"
done
#验证是否分发成功
source /root/env.sh
for master_ip in ${MASTER_IPS[@]}
do
echo -e "\033[31m>>> ${master_ip} \033[0m"
ssh root@${master_ip} "ls -ld /opt/kubernetes/bin/etcd*"
done
创建证书签名请求
cd /opt/kubernetes/ssl
cat > etcd-csr.json << EOF
{
"CN": "etcd",
"hosts": [
"127.0.0.1",
"172.27.128.11",
"172.27.128.12",
"172.27.128.13"
],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "BeiJing",
"L": "BeiJing",
"O": "k8s",
"OU": "System"
}
]
}
EOF
cd /opt/kubernetes/ssl
cfssl gencert -ca=/opt/kubernetes/ssl/ca.pem \
-ca-key=/opt/kubernetes/ssl/ca-key.pem \
-config=/opt/kubernetes/ssl/ca-config.json \
-profile=kubernetes etcd-csr.json | cfssljson -bare etcd
#验证是否生成成功
ls -ld etcd.pem etcd-key.pem
cd /opt/kubernetes/ssl
source /root/env.sh
for master_ip in ${MASTER_IPS[@]}
do
echo -e "\033[31m>>> ${master_ip} \033[0m"
scp etcd*.pem root@${master_ip}:/opt/kubernetes/ssl
done
#验证是否分发成功
source /root/env.sh
for master_ip in ${MASTER_IPS[@]}
do
echo -e "\033[31m>>> ${master_ip} \033[0m"
ssh root@${master_ip} "ls -ld /opt/kubernetes/ssl/etcd*.pem"
done
source /root/env.sh
cat > etcd.conf << EOF
#[member]
ETCD_NAME="##ETCD_NAME##"
ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
#ETCD_SNAPSHOT_COUNTER="10000"
#ETCD_HEARTBEAT_INTERVAL="100"
#ETCD_ELECTION_TIMEOUT="1000"
ETCD_LISTEN_PEER_URLS="https://##ETCD_IP##:2380"
ETCD_LISTEN_CLIENT_URLS="https://##ETCD_IP##:2379,https://127.0.0.1:2379"
#ETCD_MAX_SNAPSHOTS="5"
#ETCD_MAX_WALS="5"
#ETCD_CORS=""
#[cluster]
ETCD_INITIAL_ADVERTISE_PEER_URLS="https://##ETCD_IP##:2380"
# if you use different ETCD_NAME (e.g. test),
# set ETCD_INITIAL_CLUSTER value for this name, i.e. "test=http://..."
ETCD_INITIAL_CLUSTER="${ETCD_NODES}"
ETCD_INITIAL_CLUSTER_STATE="new"
ETCD_INITIAL_CLUSTER_TOKEN="k8s-etcd-cluster"
ETCD_ADVERTISE_CLIENT_URLS="https://##ETCD_IP##:2379"
#[security]
CLIENT_CERT_AUTH="true"
ETCD_CA_FILE="/opt/kubernetes/ssl/ca.pem"
ETCD_CERT_FILE="/opt/kubernetes/ssl/etcd.pem"
ETCD_KEY_FILE="/opt/kubernetes/ssl/etcd-key.pem"
PEER_CLIENT_CERT_AUTH="true"
ETCD_PEER_CA_FILE="/opt/kubernetes/ssl/ca.pem"
ETCD_PEER_CERT_FILE="/opt/kubernetes/ssl/etcd.pem"
ETCD_PEER_KEY_FILE="/opt/kubernetes/ssl/etcd-key.pem"
EOF
#验证是否创建成功
ls -ld etcd.conf
source /root/env.sh
for (( i=0; i < 3; i++ ))
do
sed -e "s/##ETCD_NAME##/${ETCD_NAMES[i]}/" -e "s/##ETCD_IP##/${ETCD_IPS[i]}/" etcd.conf > etcd-${ETCD_IPS[i]}.conf
done
#验证是否更改成功
ls -ld etcd-*.conf
source /root/env.sh
for master_ip in ${MASTER_IPS[@]}
do
echo -e "\033[31m>>> ${master_ip} \033[0m"
scp etcd-${master_ip}.conf root@${master_ip}:/opt/kubernetes/cfg/etcd.conf
done
#验证是否分发成功
source /root/env.sh
for master_ip in ${MASTER_IPS[@]}
do
echo -e "\033[31m>>> ${master_ip} \033[0m"
ssh root@${master_ip} "ls -ld /opt/kubernetes/cfg/etcd.conf"
done
文件重命名为 etcd.conf
source /root/env.sh
for master_ip in ${MASTER_IPS[@]}
do
echo -e "\033[31m>>> ${master_ip} \033[0m"
ssh root@${master_ip} "cat > /usr/lib/systemd/system/etcd.service << EOF
[Unit]
Description=etcd
Documentation=https://github.com/coreos/etcd
Conflicts=etcd.service
Conflicts=etcd2.service
[Service]
Type=notify
Restart=always
RestartSec=5s
LimitNOFILE=40000
TimeoutStartSec=0
WorkingDirectory=/var/lib/etcd
EnvironmentFile=-/opt/kubernetes/cfg/etcd.conf
# set GOMAXPROCS to number of processors
ExecStart=/bin/bash -c \"GOMAXPROCS=$(nproc) /opt/kubernetes/bin/etcd\"
[Install]
WantedBy=multi-user.target
EOF"
done
#验证是否创建成功
source /root/env.sh
for master_ip in ${MASTER_IPS[@]}
do
echo -e "\033[31m>>> ${master_ip} \033[0m"
ssh root@${master_ip} "ls -ld /usr/lib/systemd/system/etcd.service"
done
source /root/env.sh
for master_ip in ${MASTER_IPS[@]}
do
echo -e "\033[31m>>> ${master_ip} \033[0m"
ssh root@${master_ip} "mkdir /var/lib/etcd"
ssh root@${master_ip} "systemctl daemon-reload && systemctl enable etcd && systemctl restart etcd " &
done
systemctl restart etcd
会卡住一段时间,为正常现象。
source /root/env.sh
for master_ip in ${MASTER_IPS[@]}
do
echo -e "\033[31m>>> ${master_ip} \033[0m"
ssh root@${master_ip} "systemctl status etcd | grep Active"
done
确保状态为 active (running)
,否则查看日志,确认原因
journalctl -u etcd
etcdctl --endpoints=https://172.27.128.11:2379 \
--ca-file=/opt/kubernetes/ssl/ca.pem \
--cert-file=/opt/kubernetes/ssl/etcd.pem \
--key-file=/opt/kubernetes/ssl/etcd-key.pem cluster-health
#输出均为healthy时表示集群服务正常
member b2ed1e2fa8748a6 is healthy: got healthy result from https://172.27.128.11:2379
member a007153d77aaa744 is healthy: got healthy result from https://172.27.128.13:2379
member cd600e4146d00cbb is healthy: got healthy result from https://172.27.128.12:2379
cluster is healthy
source /root/env.sh
for master_ip in ${MASTER_IPS[@]}
do
echo -e "\033[31m>>> ${master_ip} \033[0m"
ETCDCTL_API=3 /opt/kubernetes/bin/etcdctl \
--endpoints=https://${master_ip}:2379 \
--cacert=/opt/kubernetes/ssl/ca.pem \
--cert=/opt/kubernetes/ssl/etcd.pem \
--key=/opt/kubernetes/ssl/etcd-key.pem endpoint health
done
#输出均为healthy时表示集群服务正常
>>> 172.27.128.11
https://172.27.128.11:2379 is healthy: successfully committed proposal: took = 1.456657ms
>>> 172.27.128.12
https://172.27.128.12:2379 is healthy: successfully committed proposal: took = 1.167191ms
>>> 172.27.128.13
https://172.27.128.13:2379 is healthy: successfully committed proposal: took = 1.53214ms
source /root/env.sh
ETCDCTL_API=3 /opt/kubernetes/bin/etcdctl \
-w table --cacert=/opt/kubernetes/ssl/ca.pem \
--cert=/opt/kubernetes/ssl/etcd.pem \
--key=/opt/kubernetes/ssl/etcd-key.pem \
--endpoints=${ETCD_ENDPOINTS} endpoint status
+------------------------------------+---------------------------+--------------+------------+----------------+------------------+-----------------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+------------------------------------+---------------------------+--------------+------------+----------------+------------------+-----------------------+
| https://172.27.128.11:2379 | b2ed1e2fa8748a6 | 3.3.12 | 20 kB | false | 100 | 29 |
| https://172.27.128.12:2379 | cd600e4146d00cbb | 3.3.12 | 20 kB | true | 100 | 29 |
| https://172.27.128.13:2379 | a007153d77aaa744 | 3.3.12 | 20 kB | false | 100 | 29 |
+------------------------------------+----------------------------+-------------+------------+----------------+-----------------+-----------------------+