本文记录了使用 keepalived 和 haproxy 实现Kubernetes Control Plane的高可用 (HA)。
VRRP(Virtual Router Redundancy Protocol) - 虚拟路由冗余协议
VIP - Virtual IP, 虚拟IP(浮动IP)
需要实现Kubernetes Control Plane的高可用 (HA), 也就是Kubernetes Control Plane部署成一个集群,集群中的单个Kubernetes Control Plane损坏,Kubernetes Control Plane仍然工作。
Kubernetes的Worker Nodes通过负载均衡连接Kubernetes Control Plane的集群,负载均衡使用keepalived 和 haproxy 来实现。
在三台主机上安装并启动keepalived (Master, Node1 和 Node2),这三个节点也同时运行haproxy 和 Kubernetes Control Plane的集群。
yum install keepalived
systemctl enable keepalived
设置keepalived
vim /etc/keepalived/keepalived.conf
配置如下,
vrrp_instance VI_1 {
state MASTER
interface ens160
virtual_router_id 51
priority 100 # 优先级
advert_int 1
mcast_src_ip 192.168.238.130 #本机IP
unicast_src_ip 192.168.238.130 #本机IP
unicast_peer {
192.168.238.131, #另外两个节点
192.168.238.132
}
nopreempt
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
192.168.238.100/24 #VIP
}
track_script {
check_apiserver
}
}
创建心跳线探测脚本:
vim /etc/keepalived/check_apiserver.sh
chmod +x /etc/keepalived/check_apiserver.sh
脚本如下:
#!/bin/sh
errorExit() {
echo "*** $*" 1>&2
exit 1
}
curl --silent --max-time 2 --insecure https://localhost:4300/ -o /dev/null || errorExit "Error GET https://localhost:4300/"
if ip addr | grep -q 192.168.238.100; then
curl --silent --max-time 2 --insecure https://192.168.238.100:4300/ -o /dev/null || errorExit "Error GET https://192.168.238.100:4300/"
fi
启动keepalived服务
systemctl start keepalived
安装并启动haproxy
yum install haproxy
systemctl enable haproxy
配置haproxy
vim /etc/haproxy/haproxy.cfg
配置的内容如下:
...
#---------------------------------------------------------------------
# apiserver frontend which proxys to the control plane nodes
#---------------------------------------------------------------------
frontend apiserver
bind *:4300
mode tcp
option tcplog
default_backend apiserver
...
#---------------------------------------------------------------------
# round robin balancing for apiserver
#---------------------------------------------------------------------
backend apiserver
option httpchk GET /healthz
http-check expect status 200
mode tcp
option ssl-hello-chk
balance roundrobin
#server ${HOST1_ID} ${HOST1_ADDRESS}:${APISERVER_SRC_PORT} check
# [...]
server Master 192.168.238.130:6443 check
server Node1 192.168.238.131:6443 check
server Node2 192.168.238.132:6443 check
...
系统设置:
[root@Master ~]# echo net.ipv4.ip_nonlocal_bind=1 >> /etc/sysctl.d/haproxy-keepalived.conf
[root@Master ~]# echo net.ipv4.ip_forward=1 >> /etc/sysctl.d/haproxy-keepalived.conf
[root@Master ~]# cat /etc/sysctl.d/haproxy-keepalived.conf
net.ipv4.ip_nonlocal_bind=1
net.ipv4.ip_forward=1
[root@Master ~]#
[root@Master ~]# sysctl -p /etc/sysctl.d/haproxy-keepalived.conf
net.ipv4.ip_nonlocal_bind = 1
net.ipv4.ip_forward = 1
[root@Master ~]#
启动haproxy服务
systemctl start haproxy
查看虚拟IP绑定:
[root@Master ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: ens160: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:0c:29:9e:d5:84 brd ff:ff:ff:ff:ff:ff
inet 192.168.238.130/24 brd 192.168.238.255 scope global dynamic noprefixroute ens160
valid_lft 1790sec preferred_lft 1790sec
inet 192.168.238.100/24 scope global secondary ens160
valid_lft forever preferred_lft forever
inet6 fe80::b52:173:210c:48d9/64 scope link noprefixroute
valid_lft forever preferred_lft forever
[root@Master ~]# ip a | grep 192.168.238.100
inet 192.168.238.100/24 scope global secondary ens160
[root@Master ~]#
验证VIP和haproxy在工作:
[root@Master ~]# curl --insecure https://localhost:4300
{
"kind": "Status",
"apiVersion": "v1",
"metadata": {},
"status": "Failure",
"message": "forbidden: User \"system:anonymous\" cannot get path \"/\"",
"reason": "Forbidden",
"details": {},
"code": 403
}
[root@Master ~]# curl --insecure https://192.168.238.100:4300
{
"kind": "Status",
"apiVersion": "v1",
"metadata": {},
"status": "Failure",
"message": "forbidden: User \"system:anonymous\" cannot get path \"/\"",
"reason": "Forbidden",
"details": {},
"code": 403
}[root@Master ~]#
在浏览器中输入https://192.168.238.100:4300/
可以得到以上同样的内容。如果输入http://192.168.238.100:4300/
,则返回Client sent an HTTP request to an HTTPS server.
。
根据以上配置,使用以下指令
#以下指令在三个节点中的一个节点执行,这里在Master上
kubeadm init --pod-network-cidr=10.244.0.0/16 --control-plane-endpoint 192.168.238.100
按提示在三个节点上运行以下指令:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
拷贝证书文件到剩下的节点,如下:
scp /etc/kubernetes/pki/{ca.*,sa.*,front-proxy-ca.*} root@Node1:/etc/kubernetes/pki/
mkdir -p /etc/kubernetes/pki/etcd
scp /etc/kubernetes/pki/etcd/ca.* root@Node1:/etc/kubernetes/pki/etcd
在加入其它的两个Kubernetes Control Plane之前,需要部署flannel网络模块:
kubectl apply -f kube-flannel.yml
以下指令根据提示在剩下下两个节点执行
kubeadm join 192.168.238.100:4300 --token si5oek.mbrw418p8mr357qt --discovery-token-ca-cert-hash sha256:0e23eb637e09afc4c6dbb1f891409b314d5731e46fe33d84793ba2d58da006d6 --control-plane
再安装剩下的模块即可成功,结果如下:
[root@Master ~]#kubectl get nodes -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
master Ready control-plane 3h57m v1.27.3 192.168.238.130 <none> Red Hat Enterprise Linux 8.3 (Ootpa) 4.18.0-240.el8.x86_64 containerd://1.6.21
node1 Ready control-plane 3h53m v1.27.3 192.168.238.131 <none> Red Hat Enterprise Linux 8.3 (Ootpa) 4.18.0-240.el8.x86_64 containerd://1.6.21
node2 Ready control-plane 3h51m v1.27.3 192.168.238.132 <none> Red Hat Enterprise Linux 8.3 (Ootpa) 4.18.0-240.el8.x86_64 containerd://1.6.21
[root@Master ~]#
这里的核心组件如下:
[root@Master ~]# kubectl get pods -o wide -A
NAMESPACE NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
kube-flannel kube-flannel-ds-6n8rh 1/1 Running 0 3m11s 192.168.238.131 node1 <none> <none>
kube-flannel kube-flannel-ds-c65rz 1/1 Running 0 80s 192.168.238.132 node2 <none> <none>
kube-flannel kube-flannel-ds-ss8pw 1/1 Running 0 10m 192.168.238.130 master <none> <none>
kube-system coredns-5d78c9869d-cw5cz 1/1 Running 0 17m 10.244.1.96 node1 <none> <none>
kube-system coredns-5d78c9869d-z99r8 1/1 Running 0 17m 10.244.1.95 node1 <none> <none>
kube-system etcd-master 1/1 Running 13 17m 192.168.238.130 master <none> <none>
kube-system etcd-node1 1/1 Running 0 3m10s 192.168.238.131 node1 <none> <none>
kube-system etcd-node2 1/1 Running 0 79s 192.168.238.132 node2 <none> <none>
kube-system kube-apiserver-master 1/1 Running 11 17m 192.168.238.130 master <none> <none>
kube-system kube-apiserver-node1 1/1 Running 1 3m11s 192.168.238.131 node1 <none> <none>
kube-system kube-apiserver-node2 1/1 Running 2 79s 192.168.238.132 node2 <none> <none>
kube-system kube-controller-manager-master 1/1 Running 20 (2m58s ago) 17m 192.168.238.130 master <none> <none>
kube-system kube-controller-manager-node1 1/1 Running 1 3m11s 192.168.238.131 node1 <none> <none>
kube-system kube-controller-manager-node2 1/1 Running 2 79s 192.168.238.132 node2 <none> <none>
kube-system kube-proxy-87gfk 1/1 Running 0 17m 192.168.238.130 master <none> <none>
kube-system kube-proxy-crjc4 1/1 Running 0 80s 192.168.238.132 node2 <none> <none>
kube-system kube-proxy-mnl2d 1/1 Running 0 3m11s 192.168.238.131 node1 <none> <none>
kube-system kube-scheduler-master 1/1 Running 20 (2m55s ago) 17m 192.168.238.130 master <none> <none>
kube-system kube-scheduler-node1 1/1 Running 1 3m11s 192.168.238.131 node1 <none> <none>
kube-system kube-scheduler-node2 1/1 Running 2 79s 192.168.238.132 node2 <none> <none>
[root@Master ~]#
Github: High Availability Considerations
Kubernetes: Options for Highly Available Topology
刘达的博客: haproxy+keepalived配置示例
阿里云:搭建高可用负载均衡器: haproxy+keepalived
HAproxy and keepAlived for Multiple Kubernetes Master Nodes
Create a Highly Available Kubernetes Cluster Using Keepalived and HAproxy
Install and Configure a Multi-Master HA Kubernetes Cluster with kubeadm, HAProxy and Keepalived on CentOS 7
CSDN: haproxy+keepalived搭建高可用k8s集群
CSDN: kubernetes学习—安装haproxy并配置keepalived高可用
使用 Keepalived 和 HAproxy 创建高可用 Kubernetes 集群
CSDN: HAProxy + Keepalived 配置架构解析
cnblog: haproxy+keepalived原理特点
Keepalived与HaProxy的协调合作原理分析
High availability Kubernetes cluster on bare metal - part 2