版本介绍


NAME                   VERSION   INTERNAL-IP         
cnvs-kubm-101-103      v1.15.3   172.20.101.103   

OS-IMAGE                KERNEL-VERSION              CONTAINER-RUNTIME
CentOS Linux 7 (Core)   5.2.9-1.el7.elrepo.x86_64   docker://18.6.1    

项目地址:

https://gitlab.com/PtmindDev/devops/kub-deploy/tree/cn-k8s-prod

分支:
cn-k8s-prod

集群介绍

#master
[kub-m]
172.20.101.103 name=cnvskubm-101-103  
172.20.101.104 name=cnvskubm-101-104  
172.20.101.105 name=cnvskubm-101-105  

#node
[kub-n]
172.20.101.106 name=cnvs-kubnode-101-106 
172.20.101.107 name=cnvs-kubnode-101-107
172.20.101.108 name=cnvs-kubnode-101-108
172.20.101.118 name=cnvs-kubnode-101-118 
172.20.101.120 name=cnvs-kubnode-101-120
172.20.101.122 name=cnvs-kubnode-101-122
172.20.101.123 name=cnvs-kubnode-101-123 
172.20.101.124 name=cnvs-kubnode-101-124

ansible 安装环境:

cd /workspace/kub-deploy/roles

1:升级内核 -按需

ansible-playbook  1-kernelup.yaml  

验证效果

ansible kub-all -a "uname -a"

Linux kubm-01 5.2.9-1.el7.elrepo.x86_64 #1 SMP Fri Aug 16 08:17:55 EDT 2019 x86_64 x86_64 x86_64 GNU/Linux

系统初始化

ansible-playbook 2-basic.yml

#单独指定其中一台运行:
ansible-playbook -i /etc/ansible/hosts 2-basic.yml --limit 172.20.101.103

安装nginx

ansible-playbook 3-nginx.yaml

验证

#版本
[root@kubm-01 roles]# ansible kub-m -a "nginx -v"     

172.20.101.103 | CHANGED | rc=0 >>
nginx version: nginx/1.16.1
....

#端口
ansible kub-m -m shell -a  "lsof -n -i:16443"

172.20.101.103 | CHANGED | rc=0 >>
COMMAND   PID  USER   FD   TYPE DEVICE SIZE/OFF NODE NAME
nginx   21392  root    5u  IPv4 434526      0t0  TCP *:16443 (LISTEN)
。。。。

安装keepalived

ansible-playbook 4-keepalived.yml 

返回

 

********
ok: [172.20.101.103] => {
    "output.stdout_lines": [
        "    inet 172.20.101.253/32 scope global eth0"
    ]
.......
ok: [172.20.101.105] => {
    "output.stdout_lines": []
}

监测 vip

[root@kubm-01 roles]# ping 172.20.101.253
PING 172.20.101.253 (172.20.101.253) 56(84) bytes of data.
64 bytes from 172.20.101.253: icmp_seq=1 ttl=64 time=0.059 ms

新建安装部署目录

mkdir -p /etc/kubeinstall
cd /etc/kubeinstall

创建一个初始初始化文件 (kubm-01执行)

我使用的flannel 网络插件需要配置网络参数 --pod-network-cidr=10.244.0.0/16 。

cat < /etc/kubeinstall/kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- groups:
  - system:bootstrappers:kubeadm:default-node-token
  token: abcdef.0123456789abcdef
  ttl: 24h0m0s
  usages:
  - signing
  - authentication
kind: InitConfiguration
localAPIEndpoint:
  advertiseAddress: 172.20.101.103
  bindPort: 6443
nodeRegistration:
  criSocket: /var/run/dockershim.sock
  name: cnvs-kubm-101-103
  taints:
  - effect: NoSchedule
    key: node-role.kubernetes.io/master
---
apiServer:
  timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta2
certificatesDir: /etc/kubernetes/pki
clusterName: cn-k8s-prod
controlPlaneEndpoint: "172.20.101.253:16443"
controllerManager: {}
dns:
  type: CoreDNS
etcd:
  local:
    dataDir: /var/lib/etcd
imageRepository: k8s.gcr.io
kind: ClusterConfiguration
kubernetesVersion: v1.15.0
networking:
  dnsDomain: cluster.local
  serviceSubnet: 10.245.0.0/16
  podSubnet: "10.244.0.0/16"
scheduler: {}
EOF

注意我使用nginx做的代理

master上面都配置Nginx反向代理 API Server;
172.20.101.253 是master节点的vip;
Nginx 代理端口为 16443 端口;
API Server使用 6443 端口;

使用config指定初始化集群。

kubeadm init \
--config=/etc/kubeinstall/kubeadm-config.yaml \
--upload-certs 

master 节点:

[kub-m]
172.20.101.103 name=cnvs-kubm-101-103  
172.20.101.104 name=cnvs-kubm-101-104  
172.20.101.105 name=cnvs-kubm-101-105  

第一台master节点初始化返回结果

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

You can now join any number of the control-plane node running the following command on each as root:

  kubeadm join 172.20.101.253:16443 --token hgep1g.fwo8y7rt8o8xqjml \
    --discovery-token-ca-cert-hash sha256:08462cf2017a1e3292ea355a7fc56c49ac713b84d5af45b649d7c8be539b97cf \
    --control-plane --certificate-key 1c20a3656bbcc9be4b5a16bcb4c4bab5445d221d4721900bf31b5b196b733cec

Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use 
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 172.20.101.253:16443 --token hgep1g.fwo8y7rt8o8xqjml \
    --discovery-token-ca-cert-hash sha256:08462cf2017a1e3292ea355a7fc56c49ac713b84d5af45b649d7c8be539b97cf 

在执行节点上执行如下操作,初始化k8s环境。

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

节点验证:

[root@cnvs-kubnode-101-103 kubeinstall]# 

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

#节点状态
[root@cnvs-kubnode-101-103 kubeinstall]# kubectl get nodes
NAME                STATUS     ROLES    AGE     VERSION
cnvs-kubm-101-103   NotReady   master   3m35s   v1.15.3    <=== 状态 NotReady,安装网络插件后恢复

#服务状态
[root@cnvs-kubnode-101-103 kubeinstall]# kubectl get cs
NAME                 STATUS    MESSAGE             ERROR
controller-manager   Healthy   ok                  
scheduler            Healthy   ok                  
etcd-0               Healthy   {"health":"true"} 

部署flannel网络

使用与podSubnet上面配置匹配的pod CIDR 安装CNI插件,按照实际情况修改。

kubernetes 版本更新较快,推荐部署前阅读相关文档,使用匹配版本网络插件。!!!
https://github.com/coreos/flannel#flannel

  kubeadm join 172.20.101.253:16443 --token hgep1g.fwo8y7rt8o8xqjml \
    --discovery-token-ca-cert-hash sha256:08462cf2017a1e3292ea355a7fc56c49ac713b84d5af45b649d7c8be539b97cf \
    --control-plane --certificate-key 1c20a3656bbcc9be4b5a16bcb4c4bab5445d221d4721900bf31b5b196b733cec

验证节点状态:

[root@cnvs-kubnode-101-103 kubeinstall]# kubectl get nodes
NAME                STATUS   ROLES    AGE     VERSION
cnvs-kubm-101-103   Ready    master   4m51s   v1.15.3     <=== Ready

#服务状态全部为running
root@cnvs-kubm-101-103 kubeinstall]# kubectl get pods -n kube-system
NAME                                        READY   STATUS    RESTARTS   AGE
coredns-5c98db65d4-kl66m                    1/1     Running   0          83s
coredns-5c98db65d4-xjlkl                    0/1     Running   0          83s
etcd-cnvs-kubm-101-103                      1/1     Running   0          40s
kube-apiserver-cnvs-kubm-101-103            1/1     Running   0          25s
kube-controller-manager-cnvs-kubm-101-103   1/1     Running   0          27s
kube-flannel-ds-amd64-jln7d                 1/1     Running   0          17s
kube-proxy-g2b2p                            1/1     Running   0          83s
kube-scheduler-cnvs-kubm-101-103            1/1     Running   0          35s

添加第(2 ~ 3)master节点执行如下操作

  kubeadm join 172.20.101.253:16443 --token m1n5s7.ktdbt3ce3yj4czm1 \
    --discovery-token-ca-cert-hash sha256:0eca032dcb2354f8c9e4f3ecfd2a19941b8a7b0c6cc4cc0764dc61a3a8e5ff68 \
    --control-plane --certificate-key e5b5fe5b9576a604b7107bbe12a8aa09d4ddc309c9d9447bc5552fdd481df627   

在执行节点上执行如下操作,初始化一下k8s环境。

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

验证

所有master节点ready

[root@cnvs-kubm-101-105 ~]# kubectl get nodes
NAME                STATUS   ROLES    AGE     VERSION
cnvs-kubm-101-103   Ready    master   4m35s   v1.15.3
cnvs-kubm-101-104   Ready    master   96s     v1.15.3
cnvs-kubm-101-105   Ready    master   22s     v1.15.3

所有node节点,执行如下操作

[kub-n]
172.20.101.106
172.20.101.107
172.20.101.108
172.20.101.118
172.20.101.120
172.20.101.122
172.20.101.123
172.20.101.124

单节点安装

kubeadm join 172.20.101.253:16443 --token hgep1g.fwo8y7rt8o8xqjml \
    --discovery-token-ca-cert-hash sha256:08462cf2017a1e3292ea355a7fc56c49ac713b84d5af45b649d7c8be539b97cf 

ansible 安装

ansible kub-n -m shell -a "kubeadm join 172.20.101.253:16443 --token hgep1g.fwo8y7rt8o8xqjml \
    --discovery-token-ca-cert-hash sha256:08462cf2017a1e3292ea355a7fc56c49ac713b84d5af45b649d7c8be539b97cf"

返回

[preflight] Running pre-flight checks
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[kubelet-start] Downloading configuration for the kubelet from the "kubelet-config-1.15" ConfigMap in the kube-system namespace
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Activating the kubelet service
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...

This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.

Run 'kubectl get nodes' on the control-plane to see this node join the cluster.

验证:

[root@cnvs-kubm-101-104 ~]# kubectl get node
NAME                   STATUS   ROLES    AGE     VERSION
cnvs-kubm-101-103      Ready    master   8m32s   v1.15.3
cnvs-kubm-101-104      Ready    master   5m33s   v1.15.3
cnvs-kubm-101-105      Ready    master   4m19s   v1.15.3
cnvs-kubnode-101-106   Ready       28s     v1.15.3
cnvs-kubnode-101-107   Ready       28s     v1.15.3
cnvs-kubnode-101-108   Ready       28s     v1.15.3
cnvs-kubnode-101-118   Ready       28s     v1.15.3
cnvs-kubnode-101-120   Ready       28s     v1.15.3
cnvs-kubnode-101-122   Ready       13s     v1.15.3
cnvs-kubnode-101-123   Ready       13s     v1.15.3
cnvs-kubnode-101-124   Ready       2m31s   v1.15.3

添加标签

为部署traefik做准备

kubectl label nodes {cnvs-kubnode-101-106,cnvs-kubnode-101-107} traefik=traefik-outer --overwrite

kubectl label nodes {cnvs-kubnode-101-123,cnvs-kubnode-101-124} traefik=traefik-inner --overwrite

验证

[root@cnvs-kubm-101-103 kub-deploy]# kubectl get node  -l "traefik=traefik-outer"
NAME                   STATUS   ROLES    AGE     VERSION
cnvs-kubnode-101-106   Ready       5m25s   v1.15.3
cnvs-kubnode-101-107   Ready       5m25s   v1.15.3

[root@cnvs-kubm-101-103 kub-deploy]# kubectl get node  -l "traefik=traefik-inner"
NAME                   STATUS   ROLES    AGE     VERSION
cnvs-kubnode-101-123   Ready       5m18s   v1.15.3
cnvs-kubnode-101-124   Ready       7m36s   v1.15.3

集群总体验证

#所有服务状态均为 running
[root@cnvs-kubm-101-103 kub-deploy]# kubectl get pods -n kube-system
NAME                                        READY   STATUS    RESTARTS   AGE
coredns-5c98db65d4-kl66m                    1/1     Running   0          13m
coredns-5c98db65d4-xjlkl                    1/1     Running   0          13m
etcd-cnvs-kubm-101-103                      1/1     Running   0          13m
etcd-cnvs-kubm-101-104                      1/1     Running   0          7m57s
etcd-cnvs-kubm-101-105                      1/1     Running   0          5m26s
kube-apiserver-cnvs-kubm-101-103            1/1     Running   0          13m
kube-apiserver-cnvs-kubm-101-104            1/1     Running   1          7m47s
kube-apiserver-cnvs-kubm-101-105            1/1     Running   0          4m8s
kube-controller-manager-cnvs-kubm-101-103   1/1     Running   1          13m
kube-controller-manager-cnvs-kubm-101-104   1/1     Running   0          6m38s
kube-controller-manager-cnvs-kubm-101-105   1/1     Running   0          4m11s
kube-flannel-ds-amd64-2nfbb                 1/1     Running   2          88s
kube-flannel-ds-amd64-2pbqs                 1/1     Running   1          104s
kube-flannel-ds-amd64-4w7cb                 1/1     Running   2          92s
kube-flannel-ds-amd64-gxzhw                 1/1     Running   1          3m58s
kube-flannel-ds-amd64-jln7d                 1/1     Running   0          12m
kube-flannel-ds-amd64-lj9t4                 1/1     Running   2          92s
kube-flannel-ds-amd64-mbp8k                 1/1     Running   2          91s
kube-flannel-ds-amd64-r8t9c                 1/1     Running   1          7m57s
kube-flannel-ds-amd64-rdsfm                 1/1     Running   0          3m5s
kube-flannel-ds-amd64-w8gww                 1/1     Running   1          5m26s
kube-flannel-ds-amd64-x7rh7                 1/1     Running   2          92s
kube-proxy-4kxjv                            1/1     Running   0          5m26s
kube-proxy-4vqpf                            1/1     Running   0          92s
kube-proxy-677lf                            1/1     Running   0          92s
kube-proxy-b9kr2                            1/1     Running   0          104s
kube-proxy-dm9kd                            1/1     Running   0          3m5s
kube-proxy-g2b2p                            1/1     Running   0          13m
kube-proxy-m79jv                            1/1     Running   0          3m58s
kube-proxy-snqhr                            1/1     Running   0          92s
kube-proxy-t7mkx                            1/1     Running   0          91s
kube-proxy-z2f67                            1/1     Running   0          7m57s
kube-proxy-zjpwn                            1/1     Running   0          88s
kube-scheduler-cnvs-kubm-101-103            1/1     Running   1          13m
kube-scheduler-cnvs-kubm-101-104            1/1     Running   0          7m4s
kube-scheduler-cnvs-kubm-101-105            1/1     Running   0          4m32s

#所有节点状态为ready
[root@cnvs-kubm-101-103 kub-deploy]# kubectl get nodes
NAME                   STATUS   ROLES    AGE     VERSION
cnvs-kubm-101-103      Ready    master   15m     v1.15.3
cnvs-kubm-101-104      Ready    master   9m32s   v1.15.3
cnvs-kubm-101-105      Ready    master   7m1s    v1.15.3
cnvs-kubnode-101-106   Ready       3m6s    v1.15.3
cnvs-kubnode-101-107   Ready       3m19s   v1.15.3
cnvs-kubnode-101-108   Ready       3m7s    v1.15.3
cnvs-kubnode-101-118   Ready       3m7s    v1.15.3
cnvs-kubnode-101-120   Ready       3m7s    v1.15.3
cnvs-kubnode-101-122   Ready       3m3s    v1.15.3
cnvs-kubnode-101-123   Ready       4m40s   v1.15.3
cnvs-kubnode-101-124   Ready       5m33s   v1.15.3

批量清理集群

kubectl delete node --all
ansible kub-all -m shell -a "kubeadm reset -f"
ansible kub-all -m shell -a "rm -rf /etc/kubernetes && rm -rf /var/lib/etcd && rm -rf /var/lib/kubelet && rm -rf /var/lib/kubelet && rm -rf $HOME/.kube/config "
ansible kub-all -m shell -a "iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X"
ansible kub-all -m shell -a "systemctl restart docker && systemctl enable kubelet"
ansible kub-all -m shell -a "ip link del flannel.1 && ip a|grep flannel "

推荐清理环境

如果之前配置过k8s或者首次配置没有成功等情况,推荐把系统环境清理一下,每一个节点。

systemctl stop kubelet
docker rm -f -v $(docker ps -a -q)

rm -rf /etc/kubernetes
rm -rf /var/lib/etcd
rm -rf /var/lib/kubelet
rm -rf $HOME/.kube/config
ip link del flannel.1 
iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X

yum reinstall -y kubelet
systemctl daemon-reload
systemctl restart docker
systemctl enable kubelet

参考文档

https://www.cnblogs.com/net2817/p/10513369.html
https://k8smeetup.github.io/docs/reference/setup-tools/kubeadm/kubeadm-config/