注意:生产根据业务来规划升级时间,这里以hdss7-201为例
1.1.环境描述
可以看到我们集群现在是v1.15.2版本,我们要升级v1.15.4版本
[root@hdss7-201 ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
hdss7-201.host.com Ready master,node 77d v1.15.2
hdss7-202.host.com Ready master,node 77d v1.15.2
1.2.下线升级的节点
修改nginx.conf,把201节点注释掉,添加注释 #server 192.168.41.201:81 max_fails=3 fail_timeout=10s;
[root@hdss7-211 nginx]# vim nginx.conf
upstream default_backend_traefik {
#server 192.168.41.201:81 max_fails=3 fail_timeout=10s;
server 192.168.41.202:81 max_fails=3 fail_timeout=10s;
}
server {
server_name *.od.com;
location / {
proxy_pass http://default_backend_traefik;
proxy_set_header Host $http_host;
proxy_set_header x-forwarded-for $proxy_add_x_forwarded_for;
}
}
stream {
upstream kube-apiserver {
# server 192.168.41.201:6443 max_fails=3 fail_timeout=30s;
server 192.168.41.202:6443 max_fails=3 fail_timeout=30s;
}
server {
listen 7443;
proxy_connect_timeout 2s;
proxy_timeout 900s;
proxy_pass kube-apiserver;
}
[root@hdss7-211 nginx]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
link/ether 52:54:00:39:bf:e9 brd ff:ff:ff:ff:ff:ff
inet 192.168.41.211/24 brd 192.168.41.255 scope global noprefixroute eth0
valid_lft forever preferred_lft forever
inet 192.168.41.210/32 scope global eth0
valid_lft forever preferred_lft forever
[root@hdss7-211 nginx]# nginx -t
nginx: the configuration file /etc/nginx/nginx.conf syntax is ok
nginx: configuration file /etc/nginx/nginx.conf test is successful
[root@hdss7-211 nginx]# nginx -s reload
删除节点之前可以看到两个节点,pod随机运行在201.202两个节点上
[root@hdss7-201 ~]# kubectl get pod -n kube-system -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
coredns-6b6c4f9648-fxwvh 1/1 Running 1 69d 172.7.201.2 hdss7-201.host.com <none> <none>
heapster-b5b9f794-68b22 1/1 Running 0 68m 172.7.201.6 hdss7-201.host.com <none> <none>
kubernetes-dashboard-67989c548-d99d4 1/1 Running 0 24d 172.7.202.8 hdss7-202.host.com <none> <none>
traefik-ingress-4nzcw 1/1 Running 1 64d 172.7.202.5 hdss7-202.host.com <none> <none>
traefik-ingress-vsk6h 1/1 Running 1 64d 172.7.201.4 hdss7-201.host.com <none> <none>
删除节点之后,可以看到只剩一个节点,pod全部调度到hdss7-202节点上
[root@hdss7-201 ~]# kubectl delete node hdss7-201.host.com
node "hdss7-201.host.com" deleted
[root@hdss7-201 ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
hdss7-202.host.com Ready master,node 77d v1.15.2
[root@hdss7-201 ~]# kubectl get pod -n kube-system -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
coredns-6b6c4f9648-d57z6 1/1 Running 0 11m 172.7.202.10 hdss7-202.host.com <none> <none>
heapster-b5b9f794-qzpfz 1/1 Running 0 11m 172.7.202.12 hdss7-202.host.com <none> <none>
kubernetes-dashboard-67989c548-d99d4 1/1 Running 0 24d 172.7.202.8 hdss7-202.host.com <none> <none>
traefik-ingress-4nzcw 1/1 Running 1 64d 172.7.202.5 hdss7-202.host.com <none> <none>
[root@hdss7-201 conf]# dig -t A kubernetes.default.svc.cluster.local @192.168.0.2 +short //可以看到集群内的服务根本不受影响
192.168.0.1
1.3.解压,改名,创建软链接
解压:
[root@hdss7-201 opt]# cd /root/
[root@hdss7-201 ~]# tar zxvf kubernetes-server-linux-amd64-v1.15.4.tar.gz
改名:
[root@hdss7-201 ~]# mv kubernetes kubernetes-v1.15.4
软链接:
[root@hdss7-201 ~]# mv kubernetes-v1.15.4 /opt/
[root@hdss7-201 opt]# ls -l
总用量 378260
drwx--x--x 4 root root 28 11月 17 16:46 containerd
lrwxrwxrwx 1 root root 17 12月 8 15:20 etcd -> /opt/etcd-v3.1.20
drwxr-xr-x 4 etcd etcd 166 12月 8 15:39 etcd-v3.1.20
lrwxrwxrwx 1 root root 21 12月 29 11:46 flannel -> /opt/flannel-v0.11.0/
drwxr-xr-x 3 root root 113 1月 21 15:31 flannel-v0.11.0
-rw-r--r--. 1 root root 387338240 9月 6 2018 jdk1.8.tar
drwxr-xr-x. 8 root root 255 3月 19 2018 jre
drwxr-xr-x. 8 root root 233 11月 23 2016 jre1.7
lrwxrwxrwx 1 root root 24 12月 8 17:23 kubernetes -> /opt/kubernetes-v1.15.2/
drwxr-xr-x 4 root root 50 12月 8 17:24 kubernetes-v1.15.2
drwxr-xr-x 4 root root 79 9月 18 2019 kubernetes-v1.15.4
[root@hdss7-201 opt]# rm -f kubernetes
[root@hdss7-201 opt]# ln -s /opt/kubernetes-v1.15.4 /opt/kubernetes
[root@hdss7-201 opt]# ll
总用量 378260
drwx--x--x 4 root root 28 11月 17 16:46 containerd
lrwxrwxrwx 1 root root 17 12月 8 15:20 etcd -> /opt/etcd-v3.1.20
drwxr-xr-x 4 etcd etcd 166 12月 8 15:39 etcd-v3.1.20
lrwxrwxrwx 1 root root 21 12月 29 11:46 flannel -> /opt/flannel-v0.11.0/
drwxr-xr-x 3 root root 113 1月 21 15:31 flannel-v0.11.0
-rw-r--r--. 1 root root 387338240 9月 6 2018 jdk1.8.tar
drwxr-xr-x. 8 root root 255 3月 19 2018 jre
drwxr-xr-x. 8 root root 233 11月 23 2016 jre1.7
lrwxrwxrwx 1 root root 23 3月 11 15:14 kubernetes -> /opt/kubernetes-v1.15.4
drwxr-xr-x 4 root root 50 12月 8 17:24 kubernetes-v1.15.2
drwxr-xr-x 4 root root 79 9月 18 2019 kubernetes-v1.15.4
删除无用的文件:
[root@hdss7-201 opt]# cd kubernetes
[root@hdss7-201 kubernetes]# ls
addons kubernetes-src.tar.gz LICENSES server
[root@hdss7-201 kubernetes]# rm -rf kubernetes-src.tar.gz
[root@hdss7-201 kubernetes]# cd server/bin
[root@hdss7-201 bin]# ls
apiextensions-apiserver cloud-controller-manager.docker_tag hyperkube kube-apiserver kube-apiserver.tar kube-controller-manager.docker_tag kubectl kube-proxy kube-proxy.tar kube-scheduler.docker_tag mounter
cloud-controller-manager cloud-controller-manager.tar kubeadm kube-apiserver.docker_tag kube-controller-manager kube-controller-manager.tar kubelet kube-proxy.docker_tag kube-scheduler kube-scheduler.tar
[root@hdss7-201 bin]# rm -rf *.tar
[root@hdss7-201 bin]# rm -rf *_tag
1.4.拷贝conf文件和cert文件和sh脚本
[root@hdss7-201 bin]# mkdir conf
[root@hdss7-201 bin]# mkdir cert
[root@hdss7-201 bin]# cp /opt/kubernetes-v1.15.2/server/bin/cert/* ./cert/
[root@hdss7-201 bin]# cp /opt/kubernetes-v1.15.2/server/bin/conf/* ./conf/
[root@hdss7-201 bin]# cp /opt/kubernetes-v1.15.2/server/bin/*.sh .
[root@hdss7-201 bin]# pwd
/opt/kubernetes/server/bin
[root@hdss7-201 bin]#
1.5.重启服务并检查
注意:生产上要一个一个重启,etcd,flannel不需要重启
[root@hdss7-201 bin]# supervisorctl restart all
如果没有停止成功,则需要手动杀掉进程,重新启动
[root@hdss7-201 supervisord.d]# supervisorctl status
etcd-server-7-201 RUNNING pid 22517, uptime 0:09:15
flanneld-7-201 RUNNING pid 22899, uptime 0:07:36
kube-apiserver-7-201 RUNNING pid 23209, uptime 0:05:50
kube-controller-manager-7-201 RUNNING pid 23516, uptime 0:04:19
kube-kubelet-7-201 RUNNING pid 23617, uptime 0:03:54
kube-proxy-7-201 RUNNING pid 21039, uptime 0:11:56
kube-scheduler-7-201 RUNNING pid 23940, uptime 0:03:32
[root@hdss7-201 supervisord.d]# kubectl get node
NAME STATUS ROLES AGE VERSION
hdss7-201.host.com Ready <none> 20m v1.15.4
hdss7-202.host.com Ready master,node 78d v1.15.2
[root@hdss7-201 supervisord.d]# kubectl get node -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
hdss7-201.host.com Ready <none> 20m v1.15.4 192.168.41.201 <none> CentOS Linux 7 (Core) 3.10.0-1160.45.1.el7.x86_64 docker://20.10.10
hdss7-202.host.com Ready master,node 78d v1.15.2 192.168.41.202 <none> CentOS Linux 7 (Core) 3.10.0-1160.45.1.el7.x86_64 docker://20.10.10
[root@hdss7-201 supervisord.d]# kubectl get pods -n kube-system -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
coredns-6b6c4f9648-d57z6 1/1 Running 0 22h 172.7.202.10 hdss7-202.host.com <none> <none>
heapster-b5b9f794-qzpfz 1/1 Running 0 22h 172.7.202.12 hdss7-202.host.com <none> <none>
kubernetes-dashboard-67989c548-d99d4 1/1 Running 0 24d 172.7.202.8 hdss7-202.host.com <none> <none>
traefik-ingress-4nzcw 1/1 Running 1 65d 172.7.202.5 hdss7-202.host.com <none> <none>
traefik-ingress-ww9lj 1/1 Running 0 20m 172.7.201.3 hdss7-201.host.com <none> <none>
```bash
[root@hdss7-201 supervisord.d]#
1.6 nginx挂载201节点,命令略