版本介绍
NAME VERSION INTERNAL-IP
cnvs-kubm-101-103 v1.15.3 172.20.101.103
OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
CentOS Linux 7 (Core) 5.2.9-1.el7.elrepo.x86_64 docker://18.6.1
项目地址:
https://gitlab.com/PtmindDev/devops/kub-deploy/tree/cn-k8s-prod
分支:
cn-k8s-prod
集群介绍
#master
[kub-m]
172.20.101.103 name=cnvskubm-101-103
172.20.101.104 name=cnvskubm-101-104
172.20.101.105 name=cnvskubm-101-105
#node
[kub-n]
172.20.101.106 name=cnvs-kubnode-101-106
172.20.101.107 name=cnvs-kubnode-101-107
172.20.101.108 name=cnvs-kubnode-101-108
172.20.101.118 name=cnvs-kubnode-101-118
172.20.101.120 name=cnvs-kubnode-101-120
172.20.101.122 name=cnvs-kubnode-101-122
172.20.101.123 name=cnvs-kubnode-101-123
172.20.101.124 name=cnvs-kubnode-101-124
ansible 安装环境:
cd /workspace/kub-deploy/roles
1:升级内核 -按需
ansible-playbook 1-kernelup.yaml
验证效果
ansible kub-all -a "uname -a"
Linux kubm-01 5.2.9-1.el7.elrepo.x86_64 #1 SMP Fri Aug 16 08:17:55 EDT 2019 x86_64 x86_64 x86_64 GNU/Linux
系统初始化
ansible-playbook 2-basic.yml
#单独指定其中一台运行:
ansible-playbook -i /etc/ansible/hosts 2-basic.yml --limit 172.20.101.103
安装nginx
ansible-playbook 3-nginx.yaml
验证
#版本
[root@kubm-01 roles]# ansible kub-m -a "nginx -v"
172.20.101.103 | CHANGED | rc=0 >>
nginx version: nginx/1.16.1
....
#端口
ansible kub-m -m shell -a "lsof -n -i:16443"
172.20.101.103 | CHANGED | rc=0 >>
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
nginx 21392 root 5u IPv4 434526 0t0 TCP *:16443 (LISTEN)
。。。。
安装keepalived
ansible-playbook 4-keepalived.yml
返回
********
ok: [172.20.101.103] => {
"output.stdout_lines": [
" inet 172.20.101.253/32 scope global eth0"
]
.......
ok: [172.20.101.105] => {
"output.stdout_lines": []
}
监测 vip
[root@kubm-01 roles]# ping 172.20.101.253
PING 172.20.101.253 (172.20.101.253) 56(84) bytes of data.
64 bytes from 172.20.101.253: icmp_seq=1 ttl=64 time=0.059 ms
新建安装部署目录
mkdir -p /etc/kubeinstall
cd /etc/kubeinstall
创建一个初始初始化文件 (kubm-01执行)
我使用的flannel 网络插件需要配置网络参数 --pod-network-cidr=10.244.0.0/16 。
cat < /etc/kubeinstall/kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 172.20.101.103
bindPort: 6443
nodeRegistration:
criSocket: /var/run/dockershim.sock
name: cnvs-kubm-101-103
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/master
---
apiServer:
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta2
certificatesDir: /etc/kubernetes/pki
clusterName: cn-k8s-prod
controlPlaneEndpoint: "172.20.101.253:16443"
controllerManager: {}
dns:
type: CoreDNS
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: k8s.gcr.io
kind: ClusterConfiguration
kubernetesVersion: v1.15.0
networking:
dnsDomain: cluster.local
serviceSubnet: 10.245.0.0/16
podSubnet: "10.244.0.0/16"
scheduler: {}
EOF
注意我使用nginx做的代理
master上面都配置Nginx反向代理 API Server;
172.20.101.253 是master节点的vip;
Nginx 代理端口为 16443 端口;
API Server使用 6443 端口;
使用config指定初始化集群。
kubeadm init \
--config=/etc/kubeinstall/kubeadm-config.yaml \
--upload-certs
master 节点:
[kub-m]
172.20.101.103 name=cnvs-kubm-101-103
172.20.101.104 name=cnvs-kubm-101-104
172.20.101.105 name=cnvs-kubm-101-105
第一台master节点初始化返回结果
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of the control-plane node running the following command on each as root:
kubeadm join 172.20.101.253:16443 --token hgep1g.fwo8y7rt8o8xqjml \
--discovery-token-ca-cert-hash sha256:08462cf2017a1e3292ea355a7fc56c49ac713b84d5af45b649d7c8be539b97cf \
--control-plane --certificate-key 1c20a3656bbcc9be4b5a16bcb4c4bab5445d221d4721900bf31b5b196b733cec
Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 172.20.101.253:16443 --token hgep1g.fwo8y7rt8o8xqjml \
--discovery-token-ca-cert-hash sha256:08462cf2017a1e3292ea355a7fc56c49ac713b84d5af45b649d7c8be539b97cf
在执行节点上执行如下操作,初始化k8s环境。
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
节点验证:
[root@cnvs-kubnode-101-103 kubeinstall]#
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
#节点状态
[root@cnvs-kubnode-101-103 kubeinstall]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
cnvs-kubm-101-103 NotReady master 3m35s v1.15.3 <=== 状态 NotReady,安装网络插件后恢复
#服务状态
[root@cnvs-kubnode-101-103 kubeinstall]# kubectl get cs
NAME STATUS MESSAGE ERROR
controller-manager Healthy ok
scheduler Healthy ok
etcd-0 Healthy {"health":"true"}
部署flannel网络
使用与podSubnet上面配置匹配的pod CIDR 安装CNI插件,按照实际情况修改。
kubernetes 版本更新较快,推荐部署前阅读相关文档,使用匹配版本网络插件。!!!
https://github.com/coreos/flannel#flannel
kubeadm join 172.20.101.253:16443 --token hgep1g.fwo8y7rt8o8xqjml \
--discovery-token-ca-cert-hash sha256:08462cf2017a1e3292ea355a7fc56c49ac713b84d5af45b649d7c8be539b97cf \
--control-plane --certificate-key 1c20a3656bbcc9be4b5a16bcb4c4bab5445d221d4721900bf31b5b196b733cec
验证节点状态:
[root@cnvs-kubnode-101-103 kubeinstall]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
cnvs-kubm-101-103 Ready master 4m51s v1.15.3 <=== Ready
#服务状态全部为running
root@cnvs-kubm-101-103 kubeinstall]# kubectl get pods -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-5c98db65d4-kl66m 1/1 Running 0 83s
coredns-5c98db65d4-xjlkl 0/1 Running 0 83s
etcd-cnvs-kubm-101-103 1/1 Running 0 40s
kube-apiserver-cnvs-kubm-101-103 1/1 Running 0 25s
kube-controller-manager-cnvs-kubm-101-103 1/1 Running 0 27s
kube-flannel-ds-amd64-jln7d 1/1 Running 0 17s
kube-proxy-g2b2p 1/1 Running 0 83s
kube-scheduler-cnvs-kubm-101-103 1/1 Running 0 35s
添加第(2 ~ 3)master节点执行如下操作
kubeadm join 172.20.101.253:16443 --token m1n5s7.ktdbt3ce3yj4czm1 \
--discovery-token-ca-cert-hash sha256:0eca032dcb2354f8c9e4f3ecfd2a19941b8a7b0c6cc4cc0764dc61a3a8e5ff68 \
--control-plane --certificate-key e5b5fe5b9576a604b7107bbe12a8aa09d4ddc309c9d9447bc5552fdd481df627
在执行节点上执行如下操作,初始化一下k8s环境。
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
验证
所有master节点ready
[root@cnvs-kubm-101-105 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
cnvs-kubm-101-103 Ready master 4m35s v1.15.3
cnvs-kubm-101-104 Ready master 96s v1.15.3
cnvs-kubm-101-105 Ready master 22s v1.15.3
所有node节点,执行如下操作
[kub-n]
172.20.101.106
172.20.101.107
172.20.101.108
172.20.101.118
172.20.101.120
172.20.101.122
172.20.101.123
172.20.101.124
单节点安装
kubeadm join 172.20.101.253:16443 --token hgep1g.fwo8y7rt8o8xqjml \
--discovery-token-ca-cert-hash sha256:08462cf2017a1e3292ea355a7fc56c49ac713b84d5af45b649d7c8be539b97cf
ansible 安装
ansible kub-n -m shell -a "kubeadm join 172.20.101.253:16443 --token hgep1g.fwo8y7rt8o8xqjml \
--discovery-token-ca-cert-hash sha256:08462cf2017a1e3292ea355a7fc56c49ac713b84d5af45b649d7c8be539b97cf"
返回
[preflight] Running pre-flight checks
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[kubelet-start] Downloading configuration for the kubelet from the "kubelet-config-1.15" ConfigMap in the kube-system namespace
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Activating the kubelet service
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...
This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.
Run 'kubectl get nodes' on the control-plane to see this node join the cluster.
验证:
[root@cnvs-kubm-101-104 ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
cnvs-kubm-101-103 Ready master 8m32s v1.15.3
cnvs-kubm-101-104 Ready master 5m33s v1.15.3
cnvs-kubm-101-105 Ready master 4m19s v1.15.3
cnvs-kubnode-101-106 Ready 28s v1.15.3
cnvs-kubnode-101-107 Ready 28s v1.15.3
cnvs-kubnode-101-108 Ready 28s v1.15.3
cnvs-kubnode-101-118 Ready 28s v1.15.3
cnvs-kubnode-101-120 Ready 28s v1.15.3
cnvs-kubnode-101-122 Ready 13s v1.15.3
cnvs-kubnode-101-123 Ready 13s v1.15.3
cnvs-kubnode-101-124 Ready 2m31s v1.15.3
添加标签
为部署traefik做准备
kubectl label nodes {cnvs-kubnode-101-106,cnvs-kubnode-101-107} traefik=traefik-outer --overwrite
kubectl label nodes {cnvs-kubnode-101-123,cnvs-kubnode-101-124} traefik=traefik-inner --overwrite
验证
[root@cnvs-kubm-101-103 kub-deploy]# kubectl get node -l "traefik=traefik-outer"
NAME STATUS ROLES AGE VERSION
cnvs-kubnode-101-106 Ready 5m25s v1.15.3
cnvs-kubnode-101-107 Ready 5m25s v1.15.3
[root@cnvs-kubm-101-103 kub-deploy]# kubectl get node -l "traefik=traefik-inner"
NAME STATUS ROLES AGE VERSION
cnvs-kubnode-101-123 Ready 5m18s v1.15.3
cnvs-kubnode-101-124 Ready 7m36s v1.15.3
集群总体验证
#所有服务状态均为 running
[root@cnvs-kubm-101-103 kub-deploy]# kubectl get pods -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-5c98db65d4-kl66m 1/1 Running 0 13m
coredns-5c98db65d4-xjlkl 1/1 Running 0 13m
etcd-cnvs-kubm-101-103 1/1 Running 0 13m
etcd-cnvs-kubm-101-104 1/1 Running 0 7m57s
etcd-cnvs-kubm-101-105 1/1 Running 0 5m26s
kube-apiserver-cnvs-kubm-101-103 1/1 Running 0 13m
kube-apiserver-cnvs-kubm-101-104 1/1 Running 1 7m47s
kube-apiserver-cnvs-kubm-101-105 1/1 Running 0 4m8s
kube-controller-manager-cnvs-kubm-101-103 1/1 Running 1 13m
kube-controller-manager-cnvs-kubm-101-104 1/1 Running 0 6m38s
kube-controller-manager-cnvs-kubm-101-105 1/1 Running 0 4m11s
kube-flannel-ds-amd64-2nfbb 1/1 Running 2 88s
kube-flannel-ds-amd64-2pbqs 1/1 Running 1 104s
kube-flannel-ds-amd64-4w7cb 1/1 Running 2 92s
kube-flannel-ds-amd64-gxzhw 1/1 Running 1 3m58s
kube-flannel-ds-amd64-jln7d 1/1 Running 0 12m
kube-flannel-ds-amd64-lj9t4 1/1 Running 2 92s
kube-flannel-ds-amd64-mbp8k 1/1 Running 2 91s
kube-flannel-ds-amd64-r8t9c 1/1 Running 1 7m57s
kube-flannel-ds-amd64-rdsfm 1/1 Running 0 3m5s
kube-flannel-ds-amd64-w8gww 1/1 Running 1 5m26s
kube-flannel-ds-amd64-x7rh7 1/1 Running 2 92s
kube-proxy-4kxjv 1/1 Running 0 5m26s
kube-proxy-4vqpf 1/1 Running 0 92s
kube-proxy-677lf 1/1 Running 0 92s
kube-proxy-b9kr2 1/1 Running 0 104s
kube-proxy-dm9kd 1/1 Running 0 3m5s
kube-proxy-g2b2p 1/1 Running 0 13m
kube-proxy-m79jv 1/1 Running 0 3m58s
kube-proxy-snqhr 1/1 Running 0 92s
kube-proxy-t7mkx 1/1 Running 0 91s
kube-proxy-z2f67 1/1 Running 0 7m57s
kube-proxy-zjpwn 1/1 Running 0 88s
kube-scheduler-cnvs-kubm-101-103 1/1 Running 1 13m
kube-scheduler-cnvs-kubm-101-104 1/1 Running 0 7m4s
kube-scheduler-cnvs-kubm-101-105 1/1 Running 0 4m32s
#所有节点状态为ready
[root@cnvs-kubm-101-103 kub-deploy]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
cnvs-kubm-101-103 Ready master 15m v1.15.3
cnvs-kubm-101-104 Ready master 9m32s v1.15.3
cnvs-kubm-101-105 Ready master 7m1s v1.15.3
cnvs-kubnode-101-106 Ready 3m6s v1.15.3
cnvs-kubnode-101-107 Ready 3m19s v1.15.3
cnvs-kubnode-101-108 Ready 3m7s v1.15.3
cnvs-kubnode-101-118 Ready 3m7s v1.15.3
cnvs-kubnode-101-120 Ready 3m7s v1.15.3
cnvs-kubnode-101-122 Ready 3m3s v1.15.3
cnvs-kubnode-101-123 Ready 4m40s v1.15.3
cnvs-kubnode-101-124 Ready 5m33s v1.15.3
批量清理集群
kubectl delete node --all
ansible kub-all -m shell -a "kubeadm reset -f"
ansible kub-all -m shell -a "rm -rf /etc/kubernetes && rm -rf /var/lib/etcd && rm -rf /var/lib/kubelet && rm -rf /var/lib/kubelet && rm -rf $HOME/.kube/config "
ansible kub-all -m shell -a "iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X"
ansible kub-all -m shell -a "systemctl restart docker && systemctl enable kubelet"
ansible kub-all -m shell -a "ip link del flannel.1 && ip a|grep flannel "
推荐清理环境
如果之前配置过k8s或者首次配置没有成功等情况,推荐把系统环境清理一下,每一个节点。
systemctl stop kubelet
docker rm -f -v $(docker ps -a -q)
rm -rf /etc/kubernetes
rm -rf /var/lib/etcd
rm -rf /var/lib/kubelet
rm -rf $HOME/.kube/config
ip link del flannel.1
iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X
yum reinstall -y kubelet
systemctl daemon-reload
systemctl restart docker
systemctl enable kubelet
参考文档
https://www.cnblogs.com/net2817/p/10513369.html
https://k8smeetup.github.io/docs/reference/setup-tools/kubeadm/kubeadm-config/