kubeadm安装高可用K8S-1.18集群<外部etcd方式>
## 解耦了控制平面和Etcd,集群风险小,单独挂了一台master或etcd对集群影响很小。etcd在外部方便维护和恢复。
集群规划
主机ip 角色
192.128.232.11 node01,etcd01
192.128.232.12 master02,etcd02,dns
192.128.232.13 master03,etcd03,harbor
192.128.232.15 vip地址
一:k8s初始化安装
[root@master ~]# yum install -y yum-utils device-mapper-persistent-data lvm2
[root@master ~]# yum install wget net-tools telnet tree nmap sysstat lrzsz dos2unix bind-utils ntpdate -y
[root@master ~]# yum -y install bash-completion
[root@master ~]# yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
[root@master ~]# yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
[root@master yum.repos.d]# cat > kubernetes.repo << EOF
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
二:安装master节点
#更新repo源,所以节点都需要初始化
[root@master ~]# rm /etc/localtime -rf
[root@master ~]# ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
[root@master ~]# ntpdate 0.asia.pool.ntp.org
[root@master ~]# yum repolist && yum makecache fast
# Disable the SELinux.
[root@master01 ~]# sed -i 's/^SELINUX=.*/SELINUX=disabled/' /etc/selinux/config
#关闭swap
[root@master02 ~]# swapoff -a
[root@master01 ~]# sed -i.bak '/swap/s/^/#/' /etc/fstab
# Turn off and disable the firewalld.
[root@master ~]# systemctl stop firewalld
[root@master ~]# systemctl disable firewalld
[root@master01 ~]# ssh-keygen -f ~/.ssh/id_rsa -N ''
[root@master01 ~]# cat k8s_ip.txt
192.128.232.12
192.128.232.13
[root@master01 ~]# for all_ip in `cat k8s_ip.txt`
do
echo ">>> ${all_ip}"
ssh-copy-id -i ~/.ssh/id_rsa.pub root@${all_ip}
done
[root@master01 ~]# yum install bind -y
[root@master01 ~]# cat /etc/named.conf
options {
listen-on port 53 { 192.128.232.12; }; #dns主机ip
listen-on-v6 port 53 { ::1; };
directory "/var/named";
dump-file "/var/named/data/cache_dump.db";
statistics-file "/var/named/data/named_stats.txt";
memstatistics-file "/var/named/data/named_mem_stats.txt";
recursing-file "/var/named/data/named.recursing";
secroots-file "/var/named/data/named.secroots";
allow-query { any; };
forwarders { 192.128.232.2; }; #另外添加的,一般以网关转发,
dnssec-enable no; # 原本是yes
dnssec-validation no; # 原本是yes
recursion yes;
#参数注解
named.conf文件内容解析:
listen-on:监听端口,改为监听在内网,这样其它机器也可以用
allow-query:哪些客户端能通过自建的DNS查
forwarders:上级DNS是什么
# 检查修改情况,没有报错即可(即没有信息)
[root@master01 ~]# named-checkconf
#文件最后面添加下面内容,定义区域
[root@master01 ~]# cat /etc/named.rfc1912.zones
#主机域
zone "host.com" IN {
type master;
file "host.com.zone";
allow-update { 192.128.232.12; };
};
#业务域
zone "od.com" IN {
type master;
file "od.com.zone";
allow-update { 192.128.232.12; };
};
#添加主机域文件
[root@master01 ~]# cat /var/named/host.com.zone
$ORIGIN host.com.
$TTL 600 ; 10 minutes
@ IN SOA dns.host.com. dnsadmin.host.com. (
2020011201 ; serial
10800 ; refresh (3 hours)
900 ; retry (15 minutes)
604800 ; expire (1 week)
86400 ; minimum (1 day)
)
NS dns.host.com.
$TTL 60 ; 1 minute
dns A 192.128.232.12
node01 A 192.128.232.11
master01 A 192.128.232.12
master02 A 192.128.232.13
etcd01 A 192.128.232.11
etcd02 A 192.128.232.12
etcd03 A 192.128.232.13
#添加业务域文件
[root@master01 ~]# cat /var/named/od.com.zone
$ORIGIN od.com.
$TTL 600 ; 10 minutes
@ IN SOA dns.od.com. dnsadmin.od.com. (
2020011201 ; serial
10800 ; refresh (3 hours)
900 ; retry (15 minutes)
604800 ; expire (1 week)
86400 ; minimum (1 day)
)
NS dns.od.com.
$TTL 60 ; 1 minute
dns A 192.128.232.12
# 看一下有没有报错
[root@master01 ~]# named-checkconf
[root@master01 ~]# systemctl start named
[root@master01 ~]# netstat -luntp|grep 53
#参数注解
TTL 600:指定IP包被路由器丢弃之前允许通过的最大网段数量
10 minutes:过期时间10分钟
SOA:一个域权威记录的相关信息,后面有5组参数分别设定了该域相关部分
dnsadmin.od.com. 一个假的邮箱
serial:记录的时间
$ORIGIN:即下列的域名自动补充od.com,如dns,外面看来是dns.od.com
netstat -luntp:显示 tcp,udp 的端口和进程等相关情况
# master01机器,检查主机域是否解析
[root@master01 ~]# dig -t A master01.host.com @192.128.232.12 +short
192.128.232.12
# 配置linux客户端和win客户端都能使用这个服务,修改,定义dns解析ip为自建dns服务器
[root@master01 ~]# vi /etc/sysconfig/network-scripts/ifcfg-eth0
DNS1=192.128.232.12
[root@master01 ~]# systemctl restart network
[root@master01 ~]# ping www.baidu.com
[root@master01 ~]# ping master01.host.com
#安装harbor私有仓库,安装在192.128.232.13
[root@master02 ~]# cd /opt
[root@master02 ~]# mkdir src
[root@master02 ~]# cd src/
# 可以去这个地址下载,也可以直接用我用的软件包
https://github.com/goharbor/harbor/releases
[root@master02 ~]# src]# tar xf harbor-offline-installer-v2.0.1.tgz -C /opt/
[root@master02 ~]# cd /opt/harbor/
[root@master02 harbor]# cp harbor.yml.tmpl harbor.yml
#修改harbor配置文件,修改下面四个选项
[root@master02 harbor]# vi harbor.yml
hostname: harbor.od.com
http:
port: 180
data_volume: /data/harbor
location: /data/harbor/logs
#安装harbor私有仓库
[root@master02 harbor]# mkdir -p /data/harbor/logs
[root@master02 harbor]# yum install docker-compose -y
[root@master02 harbor]# ./install.sh
[root@master02 harbor]# docker-compose ps
[root@master02 harbor]# docker ps -a
[root@master02 harbor]# rpm -Uvh http://nginx.org/packages/centos/7/noarch/RPMS/nginx-release-centos-7-0.el7.ngx.noarch.rpm
###相关报错问题:
yum的时候报:/var/run/yum.pid 已被锁定,PID 为 1610 的另一个程序正在运行。
另外一个程序锁定了 yum;等待它退出……
网上统一的解决办法:直接在终端运行 rm -f /var/run/yum.pid 将该文件删除,然后再次运行yum。
###
[root@master02 harbor]# vi /etc/nginx/conf.d/harbor.od.com.conf
server {
listen 80;
server_name harbor.od.com;
client_max_body_size 1000m;
location / {
proxy_pass http://127.0.0.1:180;
}
}
[root@master02 harbor]# nginx -t
[root@master02 harbor]# systemctl start nginx
[root@master02 harbor]# systemctl enable nginx
# 在自建DNS12机器解析域名:
[root@master01 ~]# vi /var/named/od.com.zone
# 注意serial前滚一个序号
# 最下面添加域名
harbor A 192.128.232.13
[root@master01 ~]# systemctl restart named
[root@master01 ~]# dig -t A harbor.od.com +short
192.128.232.13
#在浏览器上打开harbor.od.com,并创建kubeadm项目
账号:admin
密码:Harbor12345
#kubeadm,kubelet,docker-ce,kubectl四个程序都安装同一个版本
[root@master ~]# yum list kubelet --showduplicates | sort -r
[root@master ~]# yum -y install kubectl-1.18.18 kubelet-1.18.18 kubeadm-1.18.18 docker-ce-20.10.6
[root@master ~]# modprobe br_netfilter
[root@master ~]# cat > /etc/sysctl.d/kubernetes.conf << EOF
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.ipv4.ip_forward=1
vm.swappiness=0
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
EOF
[root@master ~]# sysctl -p /etc/sysctl.d/kubernetes.conf
[root@master ~]# systemctl enable docker && systemctl start docker && systemctl enable kubelet
// 设置 registry-mirrors 镜像加速器,可以提升获取 docker 官方镜像的速度
// 设置 cgroupdriver 为 systemd,和 kubelet 的保持一致
[root@master ~]# mkdir /data/docker -p
[root@master ~]# cat > /etc/docker/daemon.json << EOF
{
"registry-mirrors": ["https://4wvlvmti.mirror.aliyuncs.com"],
"storage-driver": "overlay2",
"insecure-registries": ["registry.access.redhat.com","quay.io","harbor.od.com"],
"graph": "/data/docker",
"exec-opts": ["native.cgroupdriver=systemd"],
"live-restore": true
}
EOF
[root@master ~]# systemctl daemon-reload
[root@master ~]# systemctl restart docker
[root@master ~]# yum -y install ipvsadm ipset sysstat conntrack libseccomp
[root@master ~]# hostnamectl set-hostname master01
[root@master01 ~]# cat >> /etc/hosts << EOF
192.128.232.11 node01
192.128.232.12 master02
192.128.232.13 master03
EOF
[root@master01 ~]# cat > /etc/sysconfig/modules/ipvs.modules <
modprobe -- ip_vs
modprobe -- ip_vs_nq
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
EOF
[root@master01 ~]# chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules && lsmod | grep -e ip_vs -e nf_conntrack_ipv4
ip_vs_sh 12688 0
ip_vs_wrr 12697 0
ip_vs_rr 12600 0
ip_vs 145458 6 ip_vs_rr,ip_vs_sh,ip_vs_wrr
nf_conntrack_ipv4 15053 2
nf_defrag_ipv4 12729 1 nf_conntrack_ipv4
nf_conntrack 139264 7 ip_vs,nf_nat,nf_nat_ipv4,xt_conntrack,nf_nat_masquerade_ipv4,nf_conntrack_netlink,nf_conntrack_ipv4
libcrc32c 12644 4 xfs,ip_vs,nf_nat,nf_conntrack
#关闭swap
[root@master ~]# rpm -ql kubelet
/etc/kubernetes/manifests
/etc/sysconfig/kubelet
/usr/bin/kubelet
/usr/lib/systemd/system/kubelet.service
[root@master ~]# cat > /etc/sysconfig/kubelet << EOF
KUBELET_EXTRA_ARGS="--fail-swap-on=false"
EOF
#设置开机自启kubelet,但不启动
[root@master ~]# systemctl enable kubelet docker
[root@master ~]# systemctl start docker
[root@master01 ~]# systemctl enable kubelet && systemctl start kubelet
# kubectl命令补全
[root@master01 ~]# echo "source <(kubectl completion bash)" >> ~/.bash_profile
[root@master01 ~]# source ~/.bash_profile
#三台master安装keepalived
[root@master ~]# rpm -Uvh http://nginx.org/packages/centos/7/noarch/RPMS/nginx-release-centos-7-0.el7.ngx.noarch.rpm
[root@master ~]# yum -y install keepalived
[root@master01 ~]# cat > /etc/keepalived/keepalived.conf<
global_defs {
router_id 192.128.232.12
}
#vrrp_script chk_nginx模块在没有部署k8s的master的时候,6443是不存在的,需要事先注释掉,否则启动不了。
vrrp_script chk_nginx {
script "/etc/keepalived/check_port.sh 6443"
interval 2
weight -20
}
vrrp_instance VI_1 {
state BACKUP #全部节点设置BACKUP,以优先级高暂时为master。
interface eth0 #对应主机的网卡名称
virtual_router_id 251 #id号要一致
priority 100
advert_int 1
nopreempt #当优先级高的主机宕机后,再次恢复时,不会抢夺vip,防止业务频繁切换。
unicast_peer { #写上其他安装keepalived主机的ip
192.128.232.13
}
authentication {
auth_type PASS
auth_pass 11111111
}
#这里track_script也需要注释掉
track_script {
chk_nginx
}
virtual_ipaddress {
192.128.232.15/24 #vip地址
}
}
EOF
[root@master01 ~]# cat > /etc/keepalived/check_port.sh<
#使用方法:
#在keepalived的配置文件中
#vrrp_script check_port {#创建一个vrrp_script脚本,检查配置
# script "/etc/keepalived/check_port.sh 6443" #配置监听的端口
# interval 2 #检查脚本的频率,单位(秒)
#}
CHK_PORT=$1
if [ -n "$CHK_PORT" ];then
PORT_PROCESS=`ss -lnt|awk -F':' '{print $2}'|awk '{print $1}'|grep "^$CHK_PORT$"|wc -l`
echo $PORT_PROCESS
if [ $PORT_PROCESS -eq 0 ];then
echo "Port $CHK_PORT Is Not Used,End."
systemctl stop keepalived
fi
else
echo "Check Port Cant Be Empty!"
fi
EOF
[root@master01 kubernetes]# systemctl restart keepalived
[root@master01 kubernetes]# systemctl enable keepalived
#############################################################################
#master02的keepalived配置
[root@master02 .ssh]# cat > /etc/keepalived/keepalived.conf <
global_defs {
router_id 192.128.232.13
}
#vrrp_script chk_nginx模块在没有部署k8s的master的时候,6443是不存在的,需要事先注释掉,否则启动不了。
vrrp_script chk_nginx {
script "/etc/keepalived/check_port.sh 6443"
interval 2
weight -20
}
vrrp_instance VI_1 {
state BACKUP #全部节点设置BACKUP,以优先级高暂时为master。
interface eth0 #对应主机的网卡名称
virtual_router_id 251 #id号要一致
priority 90
advert_int 1
nopreempt #当优先级高的主机宕机后,再次恢复时,不会抢夺vip,防止业务频繁切换。
unicast_peer { #写上其他安装keepalived主机的ip
192.128.232.12
}
authentication {
auth_type PASS
auth_pass 11111111
}
#这里track_script也需要注释掉
track_script {
chk_nginx
}
virtual_ipaddress {
192.128.232.15/24 #vip地址
}
}
EOF
[root@master02 ~]# cat > /etc/keepalived/check_port.sh<
#使用方法:
#在keepalived的配置文件中
#vrrp_script check_port {#创建一个vrrp_script脚本,检查配置
# script "/etc/keepalived/check_port.sh 6443" #配置监听的端口
# interval 2 #检查脚本的频率,单位(秒)
#}
CHK_PORT=$1
if [ -n "$CHK_PORT" ];then
PORT_PROCESS=`ss -lnt|awk -F':' '{print $2}'|awk '{print $1}'|grep "^$CHK_PORT$"|wc -l`
echo $PORT_PROCESS
if [ $PORT_PROCESS -eq 0 ];then
echo "Port $CHK_PORT Is Not Used,End."
systemctl stop keepalived
fi
else
echo "Check Port Cant Be Empty!"
fi
EOF
[root@master02 kubernetes]# systemctl restart keepalived
[root@master02 kubernetes]# systemctl enable keepalived
三、搭建高可用etcd集群
1、在node01上安装cfssl
[root@node01 ~]# wget https://pkg.cfssl.org/R1.2/cfssl_linux-amd64 -O /usr/local/bin/cfssl
[root@node01 ~]# wget https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64 -O /usr/local/bin/cfssljson
[root@node01 ~]# wget https://pkg.cfssl.org/R1.2/cfssl-certinfo_linux-amd64 -O /usr/local/bin/cfssl-certinfo
[root@node01 ~]# chmod +x /usr/local/bin/cfssl*
2) 创建ca证书
#创建ca证书的配置文件
[root@node01 ~]# mkdir /opt/certs
[root@node01 ~]# cd /opt/certs
[root@node01 ~]# cat >ca-config.json<
"signing": {
"default": {
"expiry": "438000h"
},
"profiles": {
"server": {
"expiry": "438000h",
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
]
},
"client": {
"expiry": "438000h",
"usages": [
"signing",
"key encipherment",
"client auth"
]
},
"peer": {
"expiry": "438000h",
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
]
}
}
}
}
EOF
server auth:表示client可以用该ca对server提供的证书进行验证
client auth:表示server可以用该ca对client提供的证书进行验证
创建证书签名请求ca-csr.json
[root@node01 ~]# cat > ca-csr.json <
"CN": "etcd",
"key": {
"algo": "rsa",
"size": 2048
}
}
EOF
##生成CA证书和私钥
[root@node01 ~]# cfssl gencert -initca ca-csr.json | cfssljson -bare ca
# ls ca*
# ca-config.json ca.csr ca-csr.json ca-key.pem ca.pem
3) 生成客户端证书
[root@node01 ~]# cat > client.json <
"CN": "client",
"key": {
"algo": "ecdsa",
"size": 256
}
}
EOF
#生成etcd的client证书
[root@node01 ~]# cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=client client.json | cfssljson -bare etcd-client -
4) 生成server,peer证书
创建配置 ,hosts字段都是etcd可能会部署的节点ip。
[root@node01 ~]# cat > etcd.json <
"CN": "etcd",
"hosts": [
"192.128.232.11",
"192.128.232.12",
"192.128.232.13",
"192.128.232.14",
"192.128.232.16",
"192.128.232.17"
"etcd01.host.com",
"etcd02.host.com",
"etcd03.host.com"
],
"key": {
"algo": "ecdsa",
"size": 256
},
"names": [
{
"C": "CN",
"L": "BJ",
"ST": "BJ"
}
]
}
EOF
# 生成etcd的server跟peer证书
[root@node01 ~]# cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=server etcd.json | cfssljson -bare etcd-server -
[root@node01 ~]# cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=peer etcd.json | cfssljson -bare etcd-peer -
3.部署etcd节点,node01
[root@node01 ~]# cd /opt
[root@node01 ~]# wget https://storage.googleapis.com/etcd/v3.1.25/etcd-v3.1.25-linux-amd64.tar.gz
[root@node01 ~]# tar zxf etcd-v3.1.25-linux-amd64.tar.gz
[root@node01 ~]# ln -sv etcd-v3.1.25-linux-amd64 etcd
[root@node01 ~]# mkdir -p /opt/etcd/certs /opt/etcd/data /data/logs/etcd-server
[root@node01 etcd]# useradd -s /sbin/nologin -M etcd
[root@node01 certs]# cp /opt/certs/* /opt/etcd/certs/
[root@node01 etcd]# chmod 700 /opt/etcd/data
[root@node01 certs]# cd /opt/etcd
[root@node01 etcd]# vi etcd-server-startup.sh
#!/bin/sh
./etcd --name etcd01 \
--data-dir /opt/etcd/data \
--listen-peer-urls https://192.128.232.11:2380 \
--listen-client-urls https://192.128.232.11:2379,http://127.0.0.1:2379 \
--initial-advertise-peer-urls https://192.128.232.11:2380 \
--advertise-client-urls https://192.128.232.11:2379,http://127.0.0.1:2379 \
--initial-cluster etcd01=https://192.128.232.11:2380,etcd02=https://192.128.232.12:2380,etcd03=https://192.128.232.13:2380 \
--initial-cluster-state=new \
--cert-file=/opt/etcd/certs/etcd-server.pem \
--key-file=/opt/etcd/certs/etcd-server-key.pem \
--peer-cert-file=/opt/etcd/certs/etcd-peer.pem \
--peer-key-file=/opt/etcd/certs/etcd-peer-key.pem \
--trusted-ca-file=/opt/etcd/certs/ca.pem \
--peer-trusted-ca-file=/opt/etcd/certs/ca.pem \
--quota-backend-bytes 8000000000 \
--log-output stdout
[root@node01 etcd]# chmod +x etcd-server-startup.sh
[root@node01 etcd]# chown -R etcd.etcd /opt/etcd* /data/logs/etcd-server
#安装supervisor
[root@node01 etcd]# yum install supervisor -y
[root@node01 etcd]# systemctl start supervisord
[root@node01 etcd]# systemctl enable supervisord
[root@node01 etcd]# vi /etc/supervisord.d/etcd-server.ini
[program:etcd01]
command=/opt/etcd/etcd-server-startup.sh ; the program (relative uses PATH, can take args)
numprocs=1 ; number of processes copies to start (def 1)
directory=/opt/etcd ; directory to cwd to before exec (def no cwd)
autostart=true ; start at supervisord start (default: true)
autorestart=true ; retstart at unexpected quit (default: true)
startsecs=30 ; number of secs prog must stay running (def. 1)
startretries=3 ; max # of serial start failures (default 3)
exitcodes=0,2 ; 'expected' exit codes for process (default 0,2)
stopsignal=QUIT ; signal used to kill process (default TERM)
stopwaitsecs=10 ; max num secs to wait b4 SIGKILL (default 10)
user=etcd ; setuid to this UNIX account to run the program
redirect_stderr=true ; redirect proc stderr to stdout (default false)
stdout_logfile=/data/logs/etcd-server/etcd.stdout.log ; stdout log path, NONE for none; default AUTO
stdout_logfile_maxbytes=64MB ; max # logfile bytes b4 rotation (default 50MB)
stdout_logfile_backups=4 ; # of stdout logfile backups (default 10)
stdout_capture_maxbytes=1MB ; number of bytes in 'capturemode' (default 0)
stdout_events_enabled=false ; emit events on stdout writes (default false)
[root@node01 etcd]# supervisorctl update
[root@node01 etcd]# supervisorctl status
etcd01 RUNNING pid 6379, uptime 0:08:20
[root@node01 certs]# ss -lnt
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 128 192.128.232.11:2379 *:*
LISTEN 0 128 127.0.0.1:2379 *:*
LISTEN 0 128 192.128.232.11:2380 *:*
4.在master01安装第二个etcd节点。
[root@master01 ~]# tar -zxf etcd-v3.1.25-linux-amd64.tar.gz -C /opt/
[root@master01 ~]# cd /opt/
[root@master01 opt]# ln -sv etcd-v3.1.25-linux-amd64/ etcd
[root@master01 opt]# useradd -s /sbin/nologin -M etcd
[root@master01 opt]# mkdir -p /opt/etcd/certs /opt/etcd/data /data/logs/etcd-server
[root@master01 opt]# chmod 700 /opt/etcd/data
#拷贝node01上面的证书到master01
[root@master01 ~]# cd /opt/etcd/certs/
[root@master01 ~]# useradd -s /sbin/nologin -M etcd
[root@master01 certs]# scp node01:/opt/etcd/certs/* .
[root@master01 etcd]# vi /opt/etcd/etcd-server-startup.sh
#!/bin/sh
./etcd --name etcd02 \
--data-dir /opt/etcd/data \
--listen-peer-urls https://192.128.232.12:2380 \
--listen-client-urls https://192.128.232.12:2379,http://127.0.0.1:2379 \
--initial-advertise-peer-urls https://192.128.232.12:2380 \
--advertise-client-urls https://192.128.232.12:2379,http://127.0.0.1:2379 \
--initial-cluster etcd01=https://192.128.232.11:2380,etcd02=https://192.128.232.12:2380,etcd03=https://192.128.232.13:2380 \
--initial-cluster-state=new \
--cert-file=/opt/etcd/certs/etcd-server.pem \
--key-file=/opt/etcd/certs/etcd-server-key.pem \
--peer-cert-file=/opt/etcd/certs/etcd-peer.pem \
--peer-key-file=/opt/etcd/certs/etcd-peer-key.pem \
--trusted-ca-file=/opt/etcd/certs/ca.pem \
--peer-trusted-ca-file=/opt/etcd/certs/ca.pem \
--quota-backend-bytes 8000000000 \
--log-output stdout
[root@master01 etcd]# chmod +x /opt/etcd/etcd-server-startup.sh
[root@master01 etcd]# chown -R etcd.etcd /opt/etcd* /data/logs/etcd-server
[root@master01 etcd]# yum install supervisor -y
[root@master01 etcd]# systemctl start supervisord
[root@master01 etcd]# systemctl enable supervisord
[root@master01 etcd]# vi /etc/supervisord.d/etcd-server.ini
[program:etcd02] #每个etcd修改这个地方
command=/opt/etcd/etcd-server-startup.sh ; the program (relative uses PATH, can take args)
numprocs=1 ; number of processes copies to start (def 1)
directory=/opt/etcd ; directory to cwd to before exec (def no cwd)
autostart=true ; start at supervisord start (default: true)
autorestart=true ; retstart at unexpected quit (default: true)
startsecs=30 ; number of secs prog must stay running (def. 1)
startretries=3 ; max # of serial start failures (default 3)
exitcodes=0,2 ; 'expected' exit codes for process (default 0,2)
stopsignal=QUIT ; signal used to kill process (default TERM)
stopwaitsecs=10 ; max num secs to wait b4 SIGKILL (default 10)
user=etcd ; setuid to this UNIX account to run the program
redirect_stderr=true ; redirect proc stderr to stdout (default false)
stdout_logfile=/data/logs/etcd-server/etcd.stdout.log ; stdout log path, NONE for none; default AUTO
stdout_logfile_maxbytes=64MB ; max # logfile bytes b4 rotation (default 50MB)
stdout_logfile_backups=4 ; # of stdout logfile backups (default 10)
stdout_capture_maxbytes=1MB ; number of bytes in 'capturemode' (default 0)
stdout_events_enabled=false ; emit events on stdout writes (default false)
[root@master01 etcd]# supervisorctl update
etcd02: added process group
[root@master01 certs]# supervisorctl status
etcd02 RUNNING pid 16899, uptime 0:03:30
#同样在master02安装第三个etcd03
6.验证etcd集群
[root@master02 etcd]# cp /opt/etcd/etcdctl /usr/local/bin/
[root@master02 certs]# cd /opt/etc/certs
[root@master02 certs]# etcdctl --cacert=ca.pem --cert=etcd-server.pem --key=etcd-server-key.pem --endpoints="https://192.128.232.11:2379,https://192.128.232.12:2379,https://192.128.232.13:2379" endpoint status --write-out=table
+------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | IS LEARNER | RAFT TERM | RAFT INDEX | RAFT APPLIED INDEX | ERRORS |
+------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| https://192.128.232.11:2379 | 49df13ef248c0ccd | 3.4.10 | 20 kB | true | false | 207 | 9 | 9 | |
| https://192.128.232.12:2379 | bca178b1cfc53fdd | 3.4.10 | 25 kB | false | false | 207 | 9 | 9 | |
| https://192.128.232.13:2379 | 2192f9b1cd786234 | 3.4.10 | 20 kB | false | false | 207 | 9 | 9 | |
+------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
[root@master02 certs]# etcdctl --cacert=ca.pem --cert=etcd-server.pem --key=etcd-server-key.pem --endpoints="https://192.128.232.11:2379" endpoint status -w table
+-----------------------------+-----------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | IS LEARNER | RAFT TERM | RAFT INDEX | RAFT APPLIED INDEX | ERRORS |
+-----------------------------+-----------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| https://192.128.232.11:2379 | 326ec117eddf797 | 3.4.10 | 20 kB | true | false | 1496 | 9 | 9 | |
+-----------------------------+-----------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
7.maste01初始化操作
[root@master01 ~]# mkdir -p /etc/kubernetes/pki/etcd/
[root@master01 ~]# cp /opt/certs/ca.pem /etc/kubernetes/pki/etcd/
[root@master01 ~]# cp /opt/certs/etcd-client.pem /etc/kubernetes/pki/apiserver-etcd-client.pem
[root@master01 ~]# cp /opt/certs/etcd-client-key.pem /etc/kubernetes/pki/apiserver-etcd-client-key.pem
[root@master01 ~]# tree /etc/kubernetes/pki/
/etc/kubernetes/pki/
├── apiserver-etcd-client-key.pem
├── apiserver-etcd-client.pem
└── etcd
└── ca.pem
1 directory, 3 files
#下面镜像
[root@master01 ~]# kubeadm config images push
[root@master01 ~]# docker images|grep k8s >k8s_images.txt
#通过下载到本地的镜像,推送到harbor私有仓库。
[root@master01 ~]# vi k8s_images_tag_push.sh
#!/bin/bash
cat /root/k8s_images.txt |while read line
do
ImageId=`echo $line|awk '{print $3}'`
ImageName=`echo $line|awk -F'/' '{print $2}'|awk '{print $1}'`
ImageVersion=`echo $line|awk '{print $2}'`
docker tag $ImageId harbor.od.com/kubeadm/${ImageName}:${ImageVersion}
docker push harbor.od.com/kubeadm/${ImageName}:${ImageVersion}
done
8.创建初始化配置文件并修改。
[root@master01 ~]# kubeadm config print init-defaults > kubeadm-config.yaml
#再初始化master的时候,把kubeadm-config.yaml文件内容注释的注解全部删除,不然会初始化有问题。
[root@master01 ~]# vi kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta2
kind: ClusterConfiguration
kubernetesVersion: v1.18.20
imageRepository: harbor.od.com/kubeadm
dns:
type: CoreDNS
etcd:
# local:
# dataDir: /var/lib/etcd # 由于是k8s集群之外,所以etcd数据目录不需要定义。
external:
endpoints: #下面为自定义etcd集群地址
- https://192.128.232.11:2379
- https://192.128.232.12:2379
- https://192.128.232.13:2379
caFile: /etc/kubernetes/pki/etcd/ca.pem
certFile: /etc/kubernetes/pki/apiserver-etcd-client.pem
keyFile: /etc/kubernetes/pki/apiserver-etcd-client-key.pem
apiServer:
certSANs: #填写所有kube-apiserver节点的hostname、IP、VIP,不能写网段,
- master01
- master02
- master03
- node01
- node02
- node03
- 192.128.232.11
- 192.128.232.12
- 192.128.232.13
- 192.128.232.14
- 192.128.232.15
- 192.128.232.16
- 192.128.232.17
- 192.128.232.18
- 192.128.232.19
- 192.128.232.20
controlPlaneEndpoint: "192.128.232.15:6443" #vip地址跟端口
networking:
podSubnet: "10.244.0.0/16"
serviceSubnet: "10.96.0.0/12"
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: ipvs # kube-proxy 模式
################################master init #############################################
##master01初始化k8s集群
[root@master01 ~]# kubeadm init --config=kubeadm-config.yaml
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of control-plane nodes by copying certificate authorities
and service account keys on each node and then running the following as root:
kubeadm join 192.128.232.15:6443 --token nxejnz.s3c66bpkw3aibbd5 \
--discovery-token-ca-cert-hash sha256:9c6d3b715620fb601470d5186b46b9f693f06c7bba69e7e2e70c91b7791f7010 \
--control-plane
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 192.128.232.15:6443 --token nxejnz.s3c66bpkw3aibbd5 \
--discovery-token-ca-cert-hash sha256:9c6d3b715620fb601470d5186b46b9f693f06c7bba69e7e2e70c91b7791f7010
#配置kubectl工具
[root@master01 ~]# mkdir -p $HOME/.kube
[root@master01 ~]# sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master01 ~]# sudo chown $(id -u):$(id -g) $HOME/.kube/config
#查看cs服务
[root@master01 ~]# kubectl get cs
NAME STATUS MESSAGE ERROR
scheduler Unhealthy Get http://127.0.0.1:10251/healthz: dial tcp 127.0.0.1:10251: connect: connection refused
controller-manager Unhealthy Get http://127.0.0.1:10252/healthz: dial tcp 127.0.0.1:10252: connect: connection refused
etcd-1 Healthy {"health":"true"}
etcd-2 Healthy {"health":"true"}
etcd-0 Healthy {"health":"true"}
#因为scheduler,controller-manager绑定的port为0导致的。
[root@master01 ~]# cd /etc/kubernetes/manifests/
[root@master01 manifests]# ll
总用量 12
-rw------- 1 root root 2789 12月 8 17:02 kube-apiserver.yaml
-rw------- 1 root root 2592 12月 8 17:02 kube-controller-manager.yaml
-rw------- 1 root root 1147 12月 8 17:02 kube-scheduler.yaml
[root@master01 manifests]# vi kube-controller-manager.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
component: kube-controller-manager
tier: control-plane
name: kube-controller-manager
namespace: kube-system
spec:
containers:
- command:
- kube-controller-manager
- --allocate-node-cidrs=true
- --authentication-kubeconfig=/etc/kubernetes/controller-manager.conf
- --authorization-kubeconfig=/etc/kubernetes/controller-manager.conf
- --bind-address=127.0.0.1
- --client-ca-file=/etc/kubernetes/pki/ca.crt
- --cluster-cidr=10.244.0.0/16
- --cluster-name=kubernetes
- --cluster-signing-cert-file=/etc/kubernetes/pki/ca.crt
- --cluster-signing-key-file=/etc/kubernetes/pki/ca.key
- --controllers=*,bootstrapsigner,tokencleaner
- --kubeconfig=/etc/kubernetes/controller-manager.conf
- --leader-elect=true
- --node-cidr-mask-size=24
# - --port=0 #注释掉port
- --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt
- --root-ca-file=/etc/kubernetes/pki/ca.crt
- --service-account-private-key-file=/etc/kubernetes/pki/sa.key
- --service-cluster-ip-range=10.96.0.0/12
- --use-service-account-credentials=true
image: harbor.od.com/kubeadm/kube-controller-manager:v1.18.20
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 8
httpGet:
host: 127.0.0.1
path: /healthz
port: 10257
scheme: HTTPS
initialDelaySeconds: 15
timeoutSeconds: 15
name: kube-controller-manager
resources:
requests:
cpu: 200m
volumeMounts:
- mountPath: /etc/ssl/certs
name: ca-certs
readOnly: true
- mountPath: /etc/pki
name: etc-pki
readOnly: true
- mountPath: /usr/libexec/kubernetes/kubelet-plugins/volume/exec
name: flexvolume-dir
- mountPath: /etc/kubernetes/pki
name: k8s-certs
readOnly: true
- mountPath: /etc/kubernetes/controller-manager.conf
name: kubeconfig
readOnly: true
hostNetwork: true
priorityClassName: system-cluster-critical
volumes:
- hostPath:
path: /etc/ssl/certs
type: DirectoryOrCreate
[root@master01 manifests]# vi kube-scheduler.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
component: kube-scheduler
tier: control-plane
name: kube-scheduler
namespace: kube-system
spec:
containers:
- command:
- kube-scheduler
- --authentication-kubeconfig=/etc/kubernetes/scheduler.conf
- --authorization-kubeconfig=/etc/kubernetes/scheduler.conf
- --bind-address=127.0.0.1
- --kubeconfig=/etc/kubernetes/scheduler.conf
- --leader-elect=true
# - --port=0 #注释掉port
image: harbor.od.com/kubeadm/kube-scheduler:v1.18.20
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 8
httpGet:
host: 127.0.0.1
path: /healthz
port: 10259
scheme: HTTPS
initialDelaySeconds: 15
timeoutSeconds: 15
name: kube-scheduler
resources:
requests:
cpu: 100m
volumeMounts:
- mountPath: /etc/kubernetes/scheduler.conf
name: kubeconfig
readOnly: true
hostNetwork: true
priorityClassName: system-cluster-critical
volumes:
- hostPath:
path: /etc/kubernetes/scheduler.conf
type: FileOrCreate
name: kubeconfig
status: {}
#重启kubelet
[root@master01 manifests]# systemctl restart kubelet
#查看k8s集群服务为正常
[root@master01 manifests]# kubectl get cs
NAME STATUS MESSAGE ERROR
etcd-2 Healthy {"health":"true"}
etcd-0 Healthy {"health":"true"}
etcd-1 Healthy {"health":"true"}
scheduler Healthy ok
controller-manager Healthy ok
#查看node
[root@master01 manifests]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master01 NotReady master 13m v1.18.18
4. 安装flannel网络
在master01上新建flannel网络
[root@master01 ~]# wget https://raw.githubusercontent.com/coreos/flannel/2140ac876ef134e0ed5af15c65e414cf26827915/Documentation/kube-flannel.yml
[root@master01 ~]# kubectl apply -f kube-flannel.yml
#查看证书过期时间
[root@master01 ~]# kubeadm alpha certs check-expiration
[check-expiration] Reading configuration from the cluster...
[check-expiration] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[check-expiration] Error reading configuration from the Cluster. Falling back to default configuration
W1207 10:20:28.086073 7242 configset.go:202] WARNING: kubeadm cannot validate component configs for API groups [kubelet.config.k8s.io kubeproxy.config.k8s.io]
CERTIFICATE EXPIRES RESIDUAL TIME CERTIFICATE AUTHORITY EXTERNALLY MANAGED
admin.conf Nov 12, 2111 07:57 UTC 89y no
apiserver Nov 12, 2111 07:57 UTC 89y ca no
apiserver-etcd-client Nov 12, 2111 07:57 UTC 89y etcd-ca no
apiserver-kubelet-client Nov 12, 2111 07:57 UTC 89y ca no
controller-manager.conf Nov 12, 2111 07:57 UTC 89y no
etcd-healthcheck-client Nov 12, 2111 07:57 UTC 89y etcd-ca no
etcd-peer Nov 12, 2111 07:57 UTC 89y etcd-ca no
etcd-server Nov 12, 2111 07:57 UTC 89y etcd-ca no
front-proxy-client Nov 12, 2111 07:57 UTC 89y front-proxy-ca no
scheduler.conf Nov 12, 2111 07:57 UTC 89y no
CERTIFICATE AUTHORITY EXPIRES RESIDUAL TIME EXTERNALLY MANAGED
ca Nov 12, 2111 07:57 UTC 89y no
etcd-ca Nov 12, 2111 07:57 UTC 89y no
front-proxy-ca Nov 12, 2111 07:57 UTC 89y no
5.把master01的证书发送到master02节点,下面这个脚本跟之前有点区别。
[root@master01 ~]# vi cert-main-master.sh
USER=root # customizable
CONTROL_PLANE_IPS="192.128.232.13"
for host in ${CONTROL_PLANE_IPS}; do
ssh $host mkdir -p /etc/kubernetes/pki/etcd
scp /etc/kubernetes/pki/ca.crt "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/ca.key "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/sa.key "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/sa.pub "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/front-proxy-ca.crt "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/front-proxy-ca.key "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/apiserver-etcd-client-key.pem "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/apiserver-etcd-client.pem "${USER}"@$host:/etc/kubernetes/pki/
# Quote this line if you are using external etcd
scp /etc/kubernetes/pki/etcd/ca.pem "${USER}"@$host:/etc/kubernetes/pki/etcd/
done
[root@master01 ~]# chmod +x cert-main-master.sh
#发送证书
[root@master01 ~]# ./cert-main-master.sh
ca.crt 100% 1029 352.5KB/s 00:00
ca.key 100% 1675 117.9KB/s 00:00
sa.key 100% 1675 1.9MB/s 00:00
sa.pub 100% 451 396.2KB/s 00:00
front-proxy-ca.crt 100% 1038 258.7KB/s 00:00
front-proxy-ca.key 100% 1679 61.8KB/s 00:00
apiserver-etcd-client-key.pem 100% 227 24.1KB/s 00:00
apiserver-etcd-client.pem 100% 875 442.4KB/s 00:00
ca.pem 100% 1127 74.8KB/s 00:00
7.master02加入到k8s集群
[root@master02 ~]# kubeadm join 192.128.232.15:6443 --token nxejnz.s3c66bpkw3aibbd5 \
--discovery-token-ca-cert-hash sha256:9c6d3b715620fb601470d5186b46b9f693f06c7bba69e7e2e70c91b7791f7010 \
--control-plane
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
[mark-control-plane] Marking the node master02 as control-plane by adding the label "node-role.kubernetes.io/master=''"
[mark-control-plane] Marking the node master02 as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]
This node has joined the cluster and a new control plane instance was created:
* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane (master) label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
To start administering your cluster from this node, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Run 'kubectl get nodes' to see this node join the cluster.
8.查看k8s集群服务为正常
[root@master02 ~]# rm /root/.kube/ -rf
[root@master02 ~]# mkdir -p $HOME/.kube
[root@master02 ~]# sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master02 ~]# sudo chown $(id -u):$(id -g) $HOME/.kube/config
9.切换vip地址到master02
[root@master02 kubernetes]# ip addr|grep eth0
2: eth0:
inet 192.128.232.13/24 brd 192.128.232.255 scope global noprefixroute eth0
inet 192.128.232.15/24 scope global secondary eth0
[root@master02 keepalived]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master01 Ready master 57m v1.18.18
master02 Ready master 27m v1.18.18
10.发现k8s集群服务有问题
[root@master02 keepalived]# kubectl get cs
NAME STATUS MESSAGE ERROR
scheduler Unhealthy Get http://127.0.0.1:10251/healthz: dial tcp 127.0.0.1:10251: connect: connection refused
controller-manager Unhealthy Get http://127.0.0.1:10252/healthz: dial tcp 127.0.0.1:10252: connect: connection refused
etcd-2 Healthy {"health":"true"}
etcd-0 Healthy {"health":"true"}
etcd-1 Healthy {"health":"true"}
11.注释掉port
[root@master02 keepalived]# cd /etc/kubernetes/manifests/
[root@master02 manifests]# ll
总用量 12
-rw------- 1 root root 2789 12月 8 17:34 kube-apiserver.yaml
-rw------- 1 root root 2592 12月 8 17:34 kube-controller-manager.yaml
-rw------- 1 root root 1147 12月 8 17:34 kube-scheduler.yaml
[root@master02 manifests]# vi kube-controller-manager.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
component: kube-controller-manager
tier: control-plane
name: kube-controller-manager
namespace: kube-system
spec:
containers:
- command:
- kube-controller-manager
- --allocate-node-cidrs=true
- --authentication-kubeconfig=/etc/kubernetes/controller-manager.conf
- --authorization-kubeconfig=/etc/kubernetes/controller-manager.conf
- --bind-address=127.0.0.1
- --client-ca-file=/etc/kubernetes/pki/ca.crt
- --cluster-cidr=10.244.0.0/16
- --cluster-name=kubernetes
- --cluster-signing-cert-file=/etc/kubernetes/pki/ca.crt
- --cluster-signing-key-file=/etc/kubernetes/pki/ca.key
- --controllers=*,bootstrapsigner,tokencleaner
- --kubeconfig=/etc/kubernetes/controller-manager.conf
- --leader-elect=true
- --node-cidr-mask-size=24
# - --port=0
- --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt
- --root-ca-file=/etc/kubernetes/pki/ca.crt
- --service-account-private-key-file=/etc/kubernetes/pki/sa.key
- --service-cluster-ip-range=10.96.0.0/12
- --use-service-account-credentials=true
image: harbor.od.com/kubeadm/kube-controller-manager:v1.18.20
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 8
httpGet:
host: 127.0.0.1
path: /healthz
port: 10257
scheme: HTTPS
initialDelaySeconds: 15
timeoutSeconds: 15
name: kube-controller-manager
resources:
requests:
cpu: 200m
volumeMounts:
- mountPath: /etc/ssl/certs
name: ca-certs
readOnly: true
- mountPath: /etc/pki
name: etc-pki
readOnly: true
- mountPath: /usr/libexec/kubernetes/kubelet-plugins/volume/exec
name: flexvolume-dir
- mountPath: /etc/kubernetes/pki
name: k8s-certs
readOnly: true
- mountPath: /etc/kubernetes/controller-manager.conf
name: kubeconfig
readOnly: true
hostNetwork: true
priorityClassName: system-cluster-critical
volumes:
- hostPath:
path: /etc/ssl/certs
type: DirectoryOrCreate
#################################################################################################
[root@master02 manifests]# vi kube-scheduler.yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
component: kube-scheduler
tier: control-plane
name: kube-scheduler
namespace: kube-system
spec:
containers:
- command:
- kube-scheduler
- --authentication-kubeconfig=/etc/kubernetes/scheduler.conf
- --authorization-kubeconfig=/etc/kubernetes/scheduler.conf
- --bind-address=127.0.0.1
- --kubeconfig=/etc/kubernetes/scheduler.conf
- --leader-elect=true
# - --port=0
image: harbor.od.com/kubeadm/kube-scheduler:v1.18.20
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 8
httpGet:
host: 127.0.0.1
path: /healthz
port: 10259
scheme: HTTPS
initialDelaySeconds: 15
timeoutSeconds: 15
name: kube-scheduler
resources:
requests:
cpu: 100m
volumeMounts:
- mountPath: /etc/kubernetes/scheduler.conf
name: kubeconfig
readOnly: true
hostNetwork: true
priorityClassName: system-cluster-critical
volumes:
- hostPath:
path: /etc/kubernetes/scheduler.conf
type: FileOrCreate
name: kubeconfig
status: {}
########################################################################################################################
[root@master02 manifests]# systemctl restart kubelet
[root@master02 manifests]# kubectl get cs
NAME STATUS MESSAGE
controller-manager Healthy ok
scheduler Healthy ok
etcd-2 Healthy {"health":"true"}
etcd-1 Healthy {"health":"true"}
etcd-0 Healthy {"health":"true"}
四.kube-proxy 开启 ipvs
1.修改ConfigMap的kube-system/kube-proxy中的config.conf,mode: “ipvs”
[root@master01 ~]# kubectl edit cm kube-proxy -n kube-system
...
ipvs:
excludeCIDRs: null
minSyncPeriod: 0s
scheduler: "nq" #设置ipvs规则为nq
strictARP: false
syncPeriod: 30s
kind: KubeProxyConfiguration
metricsBindAddress: 127.0.0.1:10249
mode: "ipvs" #修改调度规则为ipvs
nodePortAddresses: null
oomScoreAdj: -999
portRange: ""
resourceContainer: /kube-proxy
...
2.对于Kubernetes来说,可以直接将这三个Pod删除之后,会自动重建。
[root@master01 ~]# kubectl get pods -n kube-system|grep proxy
kube-proxy-8kgdr 1/1 Running 0 79m
kube-proxy-dq8zz 1/1 Running 0 24m
kube-proxy-gdtqx 1/1 Running 0 155m
3.批量删除 kube-proxy
[root@master01 ~]# kubectl get pod -n kube-system | grep kube-proxy | awk '{system("kubectl delete pod "$1" -n kube-system")}'
由于你已经通过ConfigMap修改了kube-proxy的配置,所以后期增加的Node节点,会直接使用ipvs模式。
4.查看日志
[root@master01 ~] kubectl get pods -n kube-system|grep proxy
kube-proxy-84mgz 1/1 Running 0 16s
kube-proxy-r8sxj 1/1 Running 0 15s
kube-proxy-wjdmp 1/1 Running 0 12s
#日志中打印出了Using ipvs Proxier,说明ipvs模式已经开启。
[root@master01 ~]# kubectl logs -f kube-proxy-84mgz -n kube-system
I0827 04:59:16.916862 1 server_others.go:170] Using ipvs Proxier.
W0827 04:59:16.917140 1 proxier.go:401] IPVS scheduler not specified, use rr by default
I0827 04:59:16.917748 1 server.go:534] Version: v1.15.3
I0827 04:59:16.927407 1 conntrack.go:52] Setting nf_conntrack_max to 131072
I0827 04:59:16.929217 1 config.go:187] Starting service config controller
I0827 04:59:16.929236 1 controller_utils.go:1029] Waiting for caches to sync for service config controller
I0827 04:59:16.929561 1 config.go:96] Starting endpoints config controller
I0827 04:59:16.929577 1 controller_utils.go:1029] Waiting for caches to sync for endpoints config controller
I0827 04:59:17.029899 1 controller_utils.go:1036] Caches are synced for endpoints config controller
I0827 04:59:17.029954 1 controller_utils.go:1036] Caches are synced for service config controller
5.使用ipvsadm测试,可以查看之前创建的Service已经使用LVS创建了集群。
[root@master01 ~]# ipvsadm -Ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 10.96.0.1:443 nq
-> 1.1.1.101:6443 Masq 1 0 0
TCP 10.96.0.10:53 nq
-> 10.244.0.2:53 Masq 1 0 0
-> 10.244.2.8:53 Masq 1 0 0
TCP 10.96.0.10:9153 nq
-> 10.244.0.2:9153 Masq 1 0 0
-> 10.244.2.8:9153 Masq 1 0 0
UDP 10.96.0.10:53 nq
-> 10.244.0.2:53 Masq 1 0 0
-> 10.244.2.8:53 Masq 1 0 0
五,node01节点加入k8s集群,跟master加入不同,不用加参数 "--control-plane"
##当你的token忘了或者过期,解决办法如下:
#第一种情况,token过期
1.先获取token
#查看当前未过期token
[root@master01 ~]# kubeadm token list | awk -F" " '{print $1}' |tail -n 1
#如果过期上面没有输出 ,可先执行此命令,创建新的token
[root@master01 ~]# kubeadm token create --print-join-command
#第二种情况,token未过期
1.列出token
[root@master01 ~]# kubeadm token list | awk -F" " '{print $1}' |tail -n 1
jd0u21.4ydhozszk7255xxb
2.获取CA公钥的哈希值
[root@master01 ~]# openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^ .* //'
(stdin)= d084adeb638b5398d146d563a768c784e44777c883c8308615bc88cb2f1f9fbc
3.从节点node01加入集群,在node01操作。
[root@node01 ~]# kubeadm join 192.128.232.15:6443 --token jd0u21.4ydhozszk7255xxb \
--discovery-token-ca-cert-hash sha256:d084adeb638b5398d146d563a768c784e44777c883c8308615bc88cb2f1f9fbc
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
W1208 18:22:47.238149 2116 join.go:346] [preflight] WARNING: JoinControlPane.controlPlane settings will be ignored when control-plane flag is not set.
[preflight] Running pre-flight checks
[WARNING SystemVerification]: this Docker version is not on the list of validated versions: 20.10.6. Latest validated version: 19.03
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[kubelet-start] Downloading configuration for the kubelet from the "kubelet-config-1.18" ConfigMap in the kube-system namespace
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...
This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.
Run 'kubectl get nodes' on the control-plane to see this node join the cluster.
2.在master01查看node01是否加入k8s集群了
[root@master01 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master01 Ready master 79m v1.18.18
master02 Ready master 49m v1.18.18
node01 Ready
3.测试k8s集群,部署一个deamonset,
[root@master02 ~]# cat nginx-ds.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nginx-ds
spec:
selector:
matchLabels:
app: nginx-ds
template:
metadata:
labels:
app: nginx-ds
spec:
containers:
- name: my-nginx
image: nginx:alpine
ports:
- containerPort: 80
[root@master02 ~]# kubectl apply -f nginx-ds.yaml
daemonset.apps/nginx-ds created
4,查看master是否有污点
[root@master02 ~]# kubectl describe node master01|grep Taint
Taints: node-role.kubernetes.io/master:NoSchedule
[root@master02 ~]# kubectl describe node master02|grep Taint
Taints: node-role.kubernetes.io/master:NoSchedule
5.pod被调度了node01,因为两台master都有污点。
[root@master02 ~]# kubectl get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-ds-pkqkj 1/1 Running 0 55s 10.244.2.2 node01
6.备份镜像
[root@node01 ~]# docker save $(docker images | grep -vE 'REPOSITORY | redis' | awk 'BEGIN{OFS=":";ORS=" "}{print $1,$2}') -o export.tar
7.导入到node节点
[root@node01 ~]# docker load -i export.tar
8.删除节点
在master节点上执行:kubectl drain node01 --delete-local-data --force --ignore-daemonsets
[root@master01]# kubectl drain node-3 --delete-local-data --force --ignore-daemonsets
node/node-3 cordoned
WARNING: ignoring DaemonSet-managed Pods: kube-system/kube-flannel-ds-amd64-wmqnf, kube-system/kube-proxy-g456v
evicting pod "coredns-5c98db65d4-6lpr2"
evicting pod "nginx-deploy-7689897d8d-kfc7v"
pod/nginx-deploy-7689897d8d-kfc7v evicted
pod/coredns-5c98db65d4-6lpr2 evicted
node/node-3 evicted
[root@master01 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master01 Ready master 79m v1.18.18
master02 Ready master 49m v1.18.18
node01 Ready,SchedulingDisabled
[root@master01]# kubectl delete node node01
[root@master01]# kubectl get nodes
在 node01 上执行
[root@node01 ~]# kubeadm reset
[root@node01 ~]# ifconfig cni0 down
[root@node01 ~]# ip link delete cni0
[root@node01 ~]# ifconfig flannel.1 down
[root@node01 ~]# ip link delete flannel.1
[root@node01 ~]# rm -rf /var/lib/cni/
[root@node01 ~]# systemctl stop kubelet