参考官网:
https://kubernetes.io/docs/setup/independent/install-kubeadm/#verify-the-mac-address-and-product-uuid-are-unique-for-every-node
kubeadm init 配置文件参数参考:
https://kubernetes.io/docs/reference/setup-tools/kubeadm/kubeadm-init/
3台 centos7 最新的系统
etc 跑在1台 master 节点上
网络组件使用Calico, 转发使用 ipvs
主机名 | ip | 说明 | 组件 |
---|---|---|---|
k8s-company01-master01 | 172.16.4.201 | 1个 master 节点 | keepalived、haproxy、etcd、kubelet、kube-apiserver |
k8s-company01-worker001 ~ 002 | 172.16.4.204 ~ 205 | 2个 worker 节点 | kubelet |
swapoff -a; sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab
timedatectl set-timezone Asia/Shanghai
crontab -e 加入:
crontab:8 * * * * /usr/sbin/ntpdate asia.pool.ntp.org && /sbin/hwclock --systohc
备注:关闭 selinux
setenforce 0
sed -i --follow-symlinks "s/^SELINUX=enforcing/SELINUX=disabled/g" /etc/selinux/config
sed -i --follow-symlinks "s/^SELINUX=permissive/SELINUX=disabled/g" /etc/selinux/config
# 关闭 firewalld
systemctl stop firewalld.service
systemctl disable firewalld.service
# 配置主机名(注意修改成自己的):
3台主机分别设置主机名:
hostnamectl set-hostname k8s-company01-master01
hostnamectl set-hostname k8s-company01-worker001
hostnamectl set-hostname k8s-company01-worker002
在3台主机的/etc/hosts 中添加
cat >> /etc/hosts <
## Install prerequisites.
yum install -y yum-utils device-mapper-persistent-data lvm2
yum-config-manager \
--add-repo \
https://download.docker.com/linux/centos/docker-ce.repo
yum install -y docker-ce-18.09.5 docker-ce-cli-18.09.5
mkdir /etc/docker
cat > /etc/docker/daemon.json <<EOF
{
"exec-opts": ["native.cgroupdriver=systemd"],
"log-driver": "json-file",
"log-opts": {
"max-size": "100m"
},
"storage-driver": "overlay2",
"storage-opts": [
"overlay2.override_kernel_check=true"
]
}
EOF
mkdir -p /etc/systemd/system/docker.service.d
systemctl daemon-reload
systemctl enable docker.service
systemctl restart docker
yum -y install yum-plugin-versionlock
yum versionlock docker-ce docker-ce-cli
yum versionlock list
cat <<EOF > /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
vm.swappiness=0
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.may_detach_mounts = 1
fs.inotify.max_user_watches=89100
fs.file-max=52706963
fs.nr_open=52706963
net.netfilter.nf_conntrack_max=2310720
EOF
modprobe br_netfilter
sysctl --system
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
exclude=kube*
EOF
yum install -y kubelet-1.14.1 kubeadm-1.14.1 kubectl-1.14.1 cri-tools-1.12.0 kubernetes-cni-0.7.5 ipvsadm --disableexcludes=kubernetes
# 加载 ipvs 相关内核模块
modprobe ip_vs
modprobe ip_vs_rr
modprobe ip_vs_wrr
modprobe ip_vs_sh
modprobe nf_conntrack_ipv4
modprobe br_netfilter
# 加入开机启动中
cat <<EOF >>/etc/rc.d/rc.local
modprobe ip_vs
modprobe ip_vs_rr
modprobe ip_vs_wrr
modprobe ip_vs_sh
modprobe nf_conntrack_ipv4
modprobe br_netfilter
EOF
chmod +x /etc/rc.d/rc.local
DOCKER_CGROUPS=$(docker info | grep 'Cgroup' | cut -d' ' -f3)
echo $DOCKER_CGROUPS
cat > /etc/sysconfig/kubelet <> ~/.bashrc
网络组件使用Calico, 转发使用 ipvs
k8s-master01
# 注意修改 controlPlaneEndpoint: "k8s-company01-master01:6443" 中对应的 master主机名,这里应该不能使用 ip,因为会自动配 tls 证书
cat << EOF > kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta1
kind: ClusterConfiguration
kubernetesVersion: v1.14.1
# add the available imageRepository in china
imageRepository: reg01.sky-mobi.com/k8s/k8s.gcr.io
controlPlaneEndpoint: "k8s-company01-master01:6443"
networking:
podSubnet: "10.254.0.0/16"
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
ipvs:
minSyncPeriod: 1s
syncPeriod: 10s
mode: ipvs
EOF
预拉取镜像:
kubeadm config images pull --config kubeadm-config.yaml
master01 初始化:
kubeadm init --config=kubeadm-config.yaml
注意刚开始的打印出的信息,根据提示,消除掉所有的 WARNING
如果想要重来,使用 kubeadm reset 命令,并且按照提示清空 iptables 和 ipvs 配置,然后重启 docker 服务。
提示成功后,记录下最后 join 的所有参数,用于后面节点的加入(两小时内有效。用于 worker 节点的加入)
# 类似:
You can now join any number of the control-plane node running the following command on each as root:
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join k8s-company01-lb:16443 --token fp0x6g.cwuzedvtwlu1zg1f \
--discovery-token-ca-cert-hash sha256:5d4095bc9e4e4b5300abe5a25afe1064f32c1ddcecc02a1f9b0aeee7710c3383
记得执行如下命令,以便使用 kubectl访问集群
mkdir -p $HOME/.kube
cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
chown $(id -u):$(id -g) $HOME/.kube/config
查看集群状态时,coredns pending 没关系,因为网络插件还没装
# 显示结果作为参考
[root@k8s-master01 ~]# kubectl get pod -n kube-system
NAME READY STATUS RESTARTS AGE
coredns-56c9dc7946-5c5z2 0/1 Pending 0 34m
coredns-56c9dc7946-thqwd 0/1 Pending 0 34m
etcd-k8s-master01 1/1 Running 2 34m
kube-apiserver-k8s-master01 1/1 Running 2 34m
kube-controller-manager-k8s-master01 1/1 Running 1 33m
kube-proxy-bl9c6 1/1 Running 2 34m
kube-scheduler-k8s-master01 1/1 Running 1 34m
安装 calico 网络插件(在 master01 上操作)
参考:
https://docs.projectcalico.org/v3.6/getting-started/kubernetes/installation/calico
下载 yaml 文件( 这里的版本是 v3.6.1,文件源于官网https://docs.projectcalico.org/v3.6/getting-started/kubernetes/installation/hosted/kubernetes-datastore/calico-networking/typha/calico.yaml 修改过网段和replicas以及 image 地址)
# 机房外部使用(有访问限制,公司自己的公网地址)
curl http://111.1.17.135/yum/scripts/k8s/calico_v3.6.1.yaml -O
# 机房内部使用
curl http://192.168.160.200/yum/scripts/k8s/calico_v3.6.1.yaml -O
下载下来后,将里面 replicas 值改成1(一个节点的 master,不需要部署3份 calico)
# 需要开启允许pod 被调度到master 节点上(在master01 上执行就行)
[root@k8s-company01-master01 ~]# kubectl taint nodes --all node-role.kubernetes.io/master-
node/k8s-company01-master01 untainted
# 安装 calico (卸载是kubectl delete -f calico_v3.6.1.yaml)
[root@k8s-company01-master01 ~]# kubectl apply -f calico_v3.6.1.yaml
configmap/calico-config created
customresourcedefinition.apiextensions.k8s.io/felixconfigurations.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/ipamblocks.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/blockaffinities.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/ipamhandles.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/ipamconfigs.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/bgppeers.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/bgpconfigurations.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/ippools.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/hostendpoints.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/clusterinformations.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/globalnetworkpolicies.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/globalnetworksets.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/networkpolicies.crd.projectcalico.org created
clusterrole.rbac.authorization.k8s.io/calico-kube-controllers created
clusterrolebinding.rbac.authorization.k8s.io/calico-kube-controllers created
clusterrole.rbac.authorization.k8s.io/calico-node created
clusterrolebinding.rbac.authorization.k8s.io/calico-node created
service/calico-typha created
deployment.apps/calico-typha created
poddisruptionbudget.policy/calico-typha created
daemonset.extensions/calico-node created
serviceaccount/calico-node created
deployment.extensions/calico-kube-controllers created
serviceaccount/calico-kube-controllers created
# 至此,所有pod 运行正常(这是3个 master 的显示结果的基础上修改的,仅供参考)
[root@k8s-company01-master01 ~]# kubectl -n kube-system get pod
NAME READY STATUS RESTARTS AGE
calico-kube-controllers-749f7c8df8-knlx4 0/1 Running 0 20s
calico-node-4txj7 0/1 Running 0 21s
calico-typha-646cdc958c-7j948 0/1 Pending 0 21s
coredns-56c9dc7946-944nt 0/1 Running 0 4m9s
coredns-56c9dc7946-nh2sk 0/1 Running 0 4m9s
etcd-k8s-company01-master01 1/1 Running 0 3m26s
kube-apiserver-k8s-company01-master01 1/1 Running 0 3m23s
kube-controller-manager-k8s-company01-master01 1/1 Running 1 3m28s
kube-proxy-8wm4v 1/1 Running 0 4m9s
kube-scheduler-k8s-company01-master01 1/1 Running 1 3m18s
# 所有master 节点都是 ready 状态
[root@k8s-company01-master01 ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
k8s-company01-master01 Ready master 4m48s v1.14.1
# 使用之前记录的 join 命令
kubeadm join k8s-company01-lb:16443 --token fp0x6g.cwuzedvtwlu1zg1f \
--discovery-token-ca-cert-hash sha256:5d4095bc9e4e4b5300abe5a25afe1064f32c1ddcecc02a1f9b0aeee7710c3383
# 添加成功显示:
This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.
Run 'kubectl get nodes' on the master to see this node join the cluster.
### kubectl get nodes 命令在任意 master 节点执行。
http://wiki.sky-mobi.com:8090/pages/viewpage.action?pageId=9079715
#删除node节点
#master节点执行以下命令
kubectl get nodes #获取node名
kubectl drain --delete-local-data --force --ignore-daemonsets #排空node
kubectl delete node #删除node
#node节点执行以下
kubeadm reset
iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X #重置iptables
ipvsadm -C #重置IPVS
#如果想再次加入可以执行以下命令
kubeadm join --token : --discovery-token-ca-cert-hash sha256:
以下命令可以查看一些状态
kubectl get pod -n kube-system -o wide
kubectl get nodes
kubectl get csr
kubectl get componentstatuses
# 不安装的情况下:
[root@k8s-master03 ~]# kubectl top nodes
Error from server (NotFound): the server could not find the requested resource (get services http:heapster:)
这里使用 helm 安装:
安装 helm(在 master01 上执行):
wget http://192.168.160.200/yum/scripts/k8s/helm-v2.13.1-linux-amd64.tar.gz
或 wget http://111.1.17.135/yum/scripts/k8s/helm-v2.13.1-linux-amd64.tar.gz
tar xvzf helm-v2.13.1-linux-amd64.tar.gz
mv linux-amd64/helm /usr/local/bin/helm
# 验证
helm help
每个节点执行
yum install -y socat
使用微软的源(阿里的源很长时间都没更新了!)
helm init --client-only --stable-repo-url http://mirror.azure.cn/kubernetes/charts/
helm repo add incubator http://mirror.azure.cn/kubernetes/charts-incubator/
helm repo update
# 在 Kubernetes 中安装 Tiller 服务,因为官方的镜像因为某些原因无法拉取,使用-i指定自己的镜像,可选镜像:registry.cn-hangzhou.aliyuncs.com/google_containers/tiller:v2.9.1(阿里云),该镜像的版本与helm客户端的版本相同,使用helm version可查看helm客户端版本。
helm init --service-account tiller --upgrade -i registry.cn-hangzhou.aliyuncs.com/google_containers/tiller:v2.13.1 --tiller-tls-cert /etc/kubernetes/ssl/tiller001.pem --tiller-tls-key /etc/kubernetes/ssl/tiller001-key.pem --tls-ca-cert /etc/kubernetes/ssl/ca.pem --tiller-namespace kube-system --stable-repo-url http://mirror.azure.cn/kubernetes/charts/ --service-account tiller --history-max 200
# 因为 Helm 的服务端 Tiller 是一个部署在 Kubernetes 中 Kube-System Namespace 下 的 Deployment,它会去连接 Kube-Api 在 Kubernetes 里创建和删除应用。
# 而从 Kubernetes 1.6 版本开始,API Server 启用了 RBAC 授权。目前的 Tiller 部署时默认没有定义授权的 ServiceAccount,这会导致访问 API Server 时被拒绝。所以我们需要明确为 Tiller 部署添加授权。
# 创建 Kubernetes 的服务帐号和绑定角色
kubectl create serviceaccount --namespace kube-system tiller
kubectl create clusterrolebinding tiller-cluster-rule --clusterrole=cluster-admin --serviceaccount=kube-system:tiller
kubectl patch deploy --namespace kube-system tiller-deploy -p '{"spec":{"template":{"spec":{"serviceAccount":"tiller"}}}}'
# 查看是否授权成功
[root@k8s-company01-master01 ~]# kubectl -n kube-system get pods|grep tiller
tiller-deploy-7bf47568d4-42wf5 1/1 Running 0 17s
[root@k8s-company01-master01 ~]# helm version
Client: &version.Version{SemVer:"v2.13.1", GitCommit:"618447cbf203d147601b4b9bd7f8c37a5d39fbb4", GitTreeState:"clean"}
Server: &version.Version{SemVer:"v2.13.1", GitCommit:"618447cbf203d147601b4b9bd7f8c37a5d39fbb4", GitTreeState:"clean"}
[root@k8s-company01-master01 ~]# helm repo list
NAME URL
stable http://mirror.azure.cn/kubernetes/charts/
local http://127.0.0.1:8879/charts
incubator http://mirror.azure.cn/kubernetes/charts-incubator/
## 如果要替换仓库,先移除原先的仓库
#helm repo remove stable
## 添加新的仓库地址
#helm repo add stable http://mirror.azure.cn/kubernetes/charts/
#helm repo add incubator http://mirror.azure.cn/kubernetes/charts-incubator/
#helm repo update
# 创建 metrics-server-custom.yaml
cat >> metrics-server-custom.yaml <
# 为方便管理,创建一个单独的 Namespace monitoring,Prometheus Operator 相关的组件都会部署到这个 Namespace。
kubectl create namespace monitoring
## 自定义 prometheus-operator 参数
# helm fetch stable/prometheus-operator --version=5.0.3 --untar
# cat prometheus-operator/values.yaml | grep -v '#' | grep -v ^$ > prometheus-operator-custom.yaml
# 只保留我们要修改 image 的部分,还有使用 https 连接 etcd,例如:
参考:https://fengxsong.github.io/2018/05/30/Using-helm-to-manage-prometheus-operator/
cat >> prometheus-operator-custom.yaml << EOF
## prometheus-operator/values.yaml
alertmanager:
service:
nodePort: 31309
type: NodePort
alertmanagerSpec:
image:
repository: reg01.sky-mobi.com/k8s/quay.io/prometheus/alertmanager
tag: v0.16.1
prometheusOperator:
image:
repository: reg01.sky-mobi.com/k8s/quay.io/coreos/prometheus-operator
tag: v0.29.0
pullPolicy: IfNotPresent
configmapReloadImage:
repository: reg01.sky-mobi.com/k8s/quay.io/coreos/configmap-reload
tag: v0.0.1
prometheusConfigReloaderImage:
repository: reg01.sky-mobi.com/k8s/quay.io/coreos/prometheus-config-reloader
tag: v0.29.0
hyperkubeImage:
repository: reg01.sky-mobi.com/k8s/k8s.gcr.io/hyperkube
tag: v1.12.1
pullPolicy: IfNotPresent
prometheus:
service:
nodePort: 32489
type: NodePort
prometheusSpec:
image:
repository: reg01.sky-mobi.com/k8s/quay.io/prometheus/prometheus
tag: v2.7.1
secrets: [etcd-client-cert]
kubeEtcd:
serviceMonitor:
scheme: https
insecureSkipVerify: false
serverName: ""
caFile: /etc/prometheus/secrets/etcd-client-cert/ca.crt
certFile: /etc/prometheus/secrets/etcd-client-cert/healthcheck-client.crt
keyFile: /etc/prometheus/secrets/etcd-client-cert/healthcheck-client.key
## prometheus-operator/charts/grafana/values.yaml
grafana:
service:
nodePort: 30579
type: NodePort
image:
repository: reg01.sky-mobi.com/k8s/grafana/grafana
tag: 6.0.2
sidecar:
image: reg01.sky-mobi.com/k8s/kiwigrid/k8s-sidecar:0.0.13
## prometheus-operator/charts/kube-state-metrics/values.yaml
kube-state-metrics:
image:
repository: reg01.sky-mobi.com/k8s/k8s.gcr.io/kube-state-metrics
tag: v1.5.0
## prometheus-operator/charts/prometheus-node-exporter/values.yaml
prometheus-node-exporter:
image:
repository: reg01.sky-mobi.com/k8s/quay.io/prometheus/node-exporter
tag: v0.17.0
EOF
## 注:以上的prometheus-operator/charts/grafana/values.yaml 对应项添加了 grafana (按chats 目录添加的:)
#[root@k8s-master01 ~]# ll prometheus-operator/charts/
#total 0
#drwxr-xr-x 4 root root 114 Apr 1 00:48 grafana
#drwxr-xr-x 3 root root 96 Apr 1 00:18 kube-state-metrics
#drwxr-xr-x 3 root root 110 Apr 1 00:20 prometheus-node-exporter
# 创建连接 etcd 的证书secret:
kubectl -n monitoring create secret generic etcd-client-cert --from-file=/etc/kubernetes/pki/etcd/ca.crt --from-file=/etc/kubernetes/pki/etcd/healthcheck-client.crt --from-file=/etc/kubernetes/pki/etcd/healthcheck-client.key
helm install stable/prometheus-operator --version=5.0.3 --name=monitoring --namespace=monitoring -f prometheus-operator-custom.yaml
## 如果想要删除重来,可以使用 helm 删除,指定名字 monitoring
#helm del --purge monitoring
#kubectl delete crd prometheusrules.monitoring.coreos.com
#kubectl delete crd servicemonitors.monitoring.coreos.com
#kubectl delete crd alertmanagers.monitoring.coreos.com
重新安装 不要删除之前的,再安装可能会报错,用 upgrade 就好:
helm upgrade monitoring stable/prometheus-operator --version=5.0.3 --namespace=monitoring -f prometheus-operator-custom.yaml
[root@k8s-company01-master01 ~]# kubectl -n monitoring get pod
NAME READY STATUS RESTARTS AGE
alertmanager-monitoring-prometheus-oper-alertmanager-0 2/2 Running 0 29m
monitoring-grafana-7dd5cf9dd7-wx8mz 2/2 Running 0 29m
monitoring-kube-state-metrics-7d98487cfc-t6qqw 1/1 Running 0 29m
monitoring-prometheus-node-exporter-fnvp9 1/1 Running 0 29m
monitoring-prometheus-node-exporter-kczcq 1/1 Running 0 29m
monitoring-prometheus-node-exporter-m8kf6 1/1 Running 0 29m
monitoring-prometheus-node-exporter-mwc4g 1/1 Running 0 29m
monitoring-prometheus-node-exporter-wxmt8 1/1 Running 0 29m
monitoring-prometheus-oper-operator-7f96b488f6-2j7h5 1/1 Running 0 29m
prometheus-monitoring-prometheus-oper-prometheus-0 3/3 Running 1 28m
[root@k8s-company01-master01 ~]# kubectl get svc -n monitoring
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
alertmanager-operated ClusterIP None 9093/TCP,6783/TCP 31m
monitoring-grafana NodePort 10.109.159.105 80:30579/TCP 32m
monitoring-kube-state-metrics ClusterIP 10.100.31.235 8080/TCP 32m
monitoring-prometheus-node-exporter ClusterIP 10.109.119.13 9100/TCP 32m
monitoring-prometheus-oper-alertmanager NodePort 10.105.171.135 9093:31309/TCP 32m
monitoring-prometheus-oper-operator ClusterIP 10.98.135.170 8080/TCP 32m
monitoring-prometheus-oper-prometheus NodePort 10.96.15.36 9090:32489/TCP 32m
prometheus-operated ClusterIP None 9090/TCP 31m
# 查看有没有异常告警,alerts里面的第一个Watchdog 是正常的报警,用于监控功能探测。
http://172.16.4.200:32489/alerts
http://172.16.4.200:32489/targets
#以下是安装 kubernetes-dashboard,用处不大,正式环境暂时不装
#helm install --name=kubernetes-dashboard stable/kubernetes-dashboard --version=1.4.0 --namespace=kube-system --set image.repository=reg01.sky-mobi.com/k8s/k8s.gcr.io/kubernetes-dashboard-amd64,image.tag=v1.10.1,rbac.clusterAdminRole=true
#Heapter 已在 Kubernetes 1.13 版本中移除(https://github.com/kubernetes/heapster/blob/master/docs/deprecation.md),推荐使用 metrics-server 与 Prometheus。