失败翻车记录
一、查看k8s中所有证书的到期时间:
# kubeadm certs check-expiration
[check-expiration] Reading configuration from the cluster...
[check-expiration] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'
CERTIFICATE EXPIRES RESIDUAL TIME CERTIFICATE AUTHORITY EXTERNALLY MANAGED
admin.conf Oct 03, 2022 07:37 UTC 4d no
apiserver Oct 03, 2022 07:37 UTC 4d ca no
apiserver-etcd-client Oct 03, 2022 07:37 UTC 4d etcd-ca no
apiserver-kubelet-client Oct 03, 2022 07:37 UTC 4d ca no
controller-manager.conf Oct 03, 2022 07:37 UTC 4d no
etcd-healthcheck-client Oct 03, 2022 07:37 UTC 4d etcd-ca no
etcd-peer Oct 03, 2022 07:37 UTC 4d etcd-ca no
etcd-server Oct 03, 2022 07:37 UTC 4d etcd-ca no
front-proxy-client Oct 03, 2022 07:37 UTC 4d front-proxy-ca no
scheduler.conf Oct 03, 2022 07:37 UTC 4d no
CERTIFICATE AUTHORITY EXPIRES RESIDUAL TIME EXTERNALLY MANAGED
ca Oct 01, 2031 07:37 UTC 9y no
etcd-ca Oct 01, 2031 07:37 UTC 9y no
front-proxy-ca Oct 01, 2031 07:37 UTC 9y no
查看基础配置
# kubectl get node -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
master1 Ready control-plane,master 361d v1.20.6 192.168.100.171 CentOS Linux 7 (Core) 3.10.0-1160.42.2.el7.x86_64 docker://20.10.6
master2 Ready control-plane,master 361d v1.20.6 192.168.100.172 CentOS Linux 7 (Core) 3.10.0-514.el7.x86_64 docker://20.10.6
master3 Ready control-plane,master 361d v1.20.6 192.168.100.173 CentOS Linux 7 (Core) 3.10.0-514.el7.x86_64 docker://20.10.6
node01 Ready 361d v1.20.6 192.168.100.174 CentOS Linux 7 (Core) 3.10.0-514.el7.x86_64 docker://20.10.6
node02 Ready 361d v1.20.6 192.168.100.175 CentOS Linux 7 (Core) 3.10.0-514.el7.x86_64 docker://20.10.6
node03 Ready 361d v1.20.6 192.168.100.176 CentOS Linux 7 (Core) 3.10.0-514.el7.x86_64 docker://20.10.6
二、查看CA证书过期时间:
# openssl x509 -in /etc/kubernetes/pki/ca.crt -noout -text |grep Not
Not Before: Oct 3 07:37:14 2021 GMT
Not After : Oct 1 07:37:14 2031 GMT
查看集群证书过期时间:
# openssl x509 -in /etc/kubernetes/pki/apiserver.crt -noout -text |grep ' Not '
Not Before: Oct 3 07:37:14 2021 GMT
Not After : Oct 3 07:37:15 2022 GMT
如果集群证书还没有过期
获取一个创建好的 k8s集群的初始化配置文件
kubeadm config view > kubeadm.yaml
三、备份证书和配置文件
备份证书
[root@master1 ~]# mkdir 0929
[root@master1 ~]# cd 0929
[root@master1 0929]# ll /etc/kubernetes/
总用量 32
-rw------- 1 root root 5568 10月 3 2021 admin.conf
-rw------- 1 root root 5603 10月 3 2021 controller-manager.conf
-rw------- 1 root root 1924 10月 3 2021 kubelet.conf
drwxr-xr-x 2 root root 113 1月 10 2022 manifests
drwxr-xr-x 3 root root 4096 10月 3 2021 pki
-rw------- 1 root root 5555 10月 3 2021 scheduler.conf
[root@master1 0929]# cp -rp /etc/kubernetes ./
[root@master1 0929]# ll
总用量 0
drwxr-xr-x 4 root root 125 1月 8 2022 kubernetes
[root@master1 0929]# ll kubernetes/
总用量 32
-rw------- 1 root root 5568 10月 3 2021 admin.conf
-rw------- 1 root root 5603 10月 3 2021 controller-manager.conf
-rw------- 1 root root 1924 10月 3 2021 kubelet.conf
drwxr-xr-x 2 root root 113 1月 10 2022 manifests
drwxr-xr-x 3 root root 4096 10月 3 2021 pki
-rw------- 1 root root 5555 10月 3 2021 scheduler.conf
备份静态pod配置文件
[root@master1 0929]# ll /etc/kubernetes/manifests/
总用量 16
-rw------- 1 root root 2226 10月 3 2021 etcd.yaml
-rw------- 1 root root 3379 1月 10 2022 kube-apiserver.yaml
-rw------- 1 root root 2827 10月 6 2021 kube-controller-manager.yaml
-rw------- 1 root root 1413 10月 6 2021 kube-scheduler.yaml
[root@master1 0929]# cp -r /etc/kubernetes/manifests ./
[root@master1 0929]# ll
总用量 0
drwxr-xr-x 4 root root 125 1月 8 2022 kubernetes
drwxr-xr-x 2 root root 113 9月 29 11:02 manifests
[root@master1 0929]# ll manifests/
总用量 16
-rw------- 1 root root 2226 9月 29 11:02 etcd.yaml
-rw------- 1 root root 3379 9月 29 11:02 kube-apiserver.yaml
-rw------- 1 root root 2827 9月 29 11:02 kube-controller-manager.yaml
-rw------- 1 root root 1413 9月 29 11:02 kube-scheduler.yaml
备份etcd
[root@master1 0929]# mkdir -p /data/etcd_backup_dir
[root@master1 0929]# ll /data/etcd_backup_dir/
总用量 10060
-rwxr-xr-x 1 root root 351 1月 16 2022 etcd_backup.sh
-rw------- 1 root root 5144608 1月 16 2022 etcd-snapshot-20220116.db
-rw------- 1 root root 5144608 1月 16 2022 snap-202201161455.db
[root@master1 0929]# ETCDCTL_API=3 etcdctl --endpoints 127.0.0.1:2379 --cert="/etc/kubernetes/pki/etcd/server.crt" --key="/etc/kubernetes/pki/etcd/server.key" --cacert="/etc/kubernetes/pki/etcd/ca.crt" snapshot save /data/etcd_backup_dir/snap-$(date +%Y%m%d%H%M).db
{"level":"info","ts":1664420716.5343292,"caller":"snapshot/v3_snapshot.go:119","msg":"created temporary db file","path":"/data/etcd_backup_dir/snap-202209291105.db.part"}
{"level":"info","ts":"2022-09-29T11:05:16.555+0800","caller":"clientv3/maintenance.go:200","msg":"opened snapshot stream; downloading"}
{"level":"info","ts":1664420716.555565,"caller":"snapshot/v3_snapshot.go:127","msg":"fetching snapshot","endpoint":"127.0.0.1:2379"}
{"level":"info","ts":"2022-09-29T11:05:17.360+0800","caller":"clientv3/maintenance.go:208","msg":"completed snapshot read; closing"}
{"level":"info","ts":1664420717.4313715,"caller":"snapshot/v3_snapshot.go:142","msg":"fetched snapshot","endpoint":"127.0.0.1:2379","size":"5.3 MB","took":0.896710978}
{"level":"info","ts":1664420717.431757,"caller":"snapshot/v3_snapshot.go:152","msg":"saved","path":"/data/etcd_backup_dir/snap-202209291105.db"}
Snapshot saved at /data/etcd_backup_dir/snap-202209291105.db
[root@master1 0929]# ll /data/etcd_backup_dir/
总用量 15248
-rwxr-xr-x 1 root root 351 1月 16 2022 etcd_backup.sh
-rw------- 1 root root 5144608 1月 16 2022 etcd-snapshot-20220116.db
-rw------- 1 root root 5144608 1月 16 2022 snap-202201161455.db
-rw------- 1 root root 5308448 9月 29 11:05 snap-202209291105.db
物理备份
[root@master1 ~]# ll /var/lib/etcd
总用量 0
drwx------ 4 root root 29 9月 29 09:38 member
[root@master1 ~]# ll /var/lib/etcd/member/
总用量 0
drwx------ 2 root root 246 9月 29 10:36 snap
drwx------ 2 root root 244 9月 29 09:38 wal
[root@master1 ~]# mkdir -p /data/etcd_data_back
[root@master1 ~]# cp -r /var/lib/etcd /data/etcd_data_back/etcd.bak
[root@master1 ~]# ll /data/etcd_data_back/
总用量 0
drwx------ 3 root root 20 9月 29 11:13 etcd.bak
[root@master1 ~]# ll /data/etcd_data_back/etcd.bak/
总用量 0
drwx------ 4 root root 29 9月 29 11:13 member
[root@master1 ~]# ll /data/etcd_data_back/etcd.bak/member/
总用量 0
drwx------ 2 root root 246 9月 29 11:13 snap
drwx------ 2 root root 244 9月 29 11:13 wal
四、更新证书
# kubeadm certs renew all
[renew] Reading configuration from the cluster...
[renew] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'
certificate embedded in the kubeconfig file for the admin to use and for kubeadm itself renewed
certificate for serving the Kubernetes API renewed
certificate the apiserver uses to access etcd renewed
certificate for the API server to connect to kubelet renewed
certificate embedded in the kubeconfig file for the controller manager to use renewed
certificate for liveness probes to healthcheck etcd renewed
certificate for etcd nodes to communicate with each other renewed
certificate for serving etcd renewed
certificate for the front proxy client renewed
certificate embedded in the kubeconfig file for the scheduler manager to use renewed
Done renewing certificates. You must restart the kube-apiserver, kube-controller-manager, kube-scheduler and etcd, so that they can use the new certificates.
# 由输出内容可知,更新的内容为:
Kubernetes API的证书;
apiserver的证书,用来访问etcd;
API server的证书,用来连接kubelet;
嵌入在kubeconfig文件中,用于controller manager的使用;
liveness probes的证书,用来对etcd作health check;
etcd节点的证书,用来互相访问;
前端代理服务的证书;
嵌入在kubeconfig文件中,用于scheduler manager的使用。
把旧版的证书转移到别的路径, 暂时清空 /etc/kubernetes/manifests/
[root@master1 ~]# cd /etc/kubernetes/manifests/
[root@master1 manifests]# mkdir -p /data/manifests
[root@master1 manifests]# mv ./* /data/manifests/
[root@master1 manifests]# ll /data/manifests/
总用量 16
-rw------- 1 root root 2226 9月 29 11:43 etcd.yaml
-rw------- 1 root root 3379 9月 29 11:43 kube-apiserver.yaml
-rw------- 1 root root 2827 9月 29 11:43 kube-controller-manager.yaml
-rw------- 1 root root 1413 9月 29 11:43 kube-scheduler.yaml
等待1分钟 期间测试下
[root@master1 manifests]# kubectl get po -A
The connection to the server 10.170.13.8:6443 was refused - did you specify the right host or port?
连接不上也正常
恢复一下/etc/kubernetes/manifests yaml文件
[root@master1 manifests]# pwd
/etc/kubernetes/manifests
[root@master1 manifests]# cp -rp /data/manifests/* ./
[root@master1 manifests]# ll
总用量 16
-rw------- 1 root root 2226 10月 3 2021 etcd.yaml
-rw------- 1 root root 3379 1月 10 2022 kube-apiserver.yaml
-rw------- 1 root root 2827 10月 6 2021 kube-controller-manager.yaml
-rw------- 1 root root 1413 10月 6 2021 kube-scheduler.yaml
查看下 pod的情况
[root@master1 manifests]# kubectl get po -A
[root@master1 manifests]# kubectl get no
五、 查看新的证书过期时间
# kubeadm certs check-expiration