首先准备三台服务器(我选择的是青云CentOS7.9):一个主服务器配置为4核8G,两个从服务器配置为8核16G
三台服务器处于同一个VPC
中:
并且公用一个安全组,安全组之间保证组内互信:
每台服务器都需要执行:
# 删除Docker
yum remove docker*
# 安装yum工具
yum install -y yum-utils
# 配置docker的yum源
yum-config-manager \
--add-repo \
http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
# 安装Docker
yum install -y docker-ce-20.10.7 docker-ce-cli-20.10.7 containerd.io-1.4.6
# 将Docker设置为开机启动
systemctl enable docker --now
# Docker加速配置
mkdir -p /etc/docker
tee /etc/docker/daemon.json <<-'EOF'
{
"registry-mirrors": ["https://82m9ar63.mirror.aliyuncs.com"],
"exec-opts": ["native.cgroupdriver=systemd"],
"log-driver": "json-file",
"log-opts": {
"max-size": "100m"
},
"storage-driver": "overlay2"
}
EOF
systemctl daemon-reload
systemctl restart docker
每台服务器都需要执行:
# 为每台服务器设置hostname
hostnamectl set-hostname k8s-master
hostnamectl set-hostname node1
hostnamectl set-hostname node2
# 将 SELinux 设置为 permissive 模式(相当于将其禁用)
setenforce 0
sed -i 's/^SELINUX=enforcing$/SELINUX=permissive/' /etc/selinux/config
# 关闭swap
swapoff -a
sed -ri 's/.*swap.*/#&/' /etc/fstab
#允许 iptables 检查桥接流量
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
br_netfilter
EOF
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF
sysctl --system
每台服务器都需要执行:
# 配置k8s的yum源
cat <<EOF | sudo tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=http://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=http://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg
http://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
# 安装kubelet、kubeadm、kubectl
yum install -y kubelet-1.20.9 kubeadm-1.20.9 kubectl-1.20.9
# 启动kubelet
systemctl enable --now kubelet
# 配置master域名(内网ip通过"ip a"命令查看)
echo "172.31.0.2 k8s-master" >> /etc/hosts
# 测试每台机器是否能ping通master的ip
ping k8s-master
Master节点执行:
# 初始化Master(注意ip地址和主机hostname)
kubeadm init \
--apiserver-advertise-address=172.31.0.2 \
--control-plane-endpoint=k8s-master \
--image-repository registry.cn-hangzhou.aliyuncs.com/lfy_k8s_images \
--kubernetes-version v1.20.9 \
--service-cidr=10.96.0.0/16 \
--pod-network-cidr=192.168.0.0/16
Master节点执行:
# 根据初始化后得到需要执行的命令
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Master节点执行:
# 下载colico.yaml
curl https://docs.projectcalico.org/v3.20/manifests/calico.yaml -O
# 应用配置文件
kubectl apply -f calico.yaml
Worker节点执行:
# 根据初始化后得到需要执行join命令(单master)
kubeadm join k8s-master:6443 --token du2751.mt5rvcgkgo8n806v \
--discovery-token-ca-cert-hash sha256:0c4e306dbe725c5363ad272954f3d7bdc00fcb0c6c249338b9ab8f9e6f7c3b02
经过上述操作,我们已经把K8S集群搭建完成了!
注意:之所以没有安装dashboard,因为我们会使用KubeSphere
取代dashboard
每台服务器都执行:
# 安装NFS工具
yum install -y nfs-utils
Master服务器执行:
# 暴露挂载配置
echo "/nfs/data/ *(insecure,rw,sync,no_root_squash)" > /etc/exports
# 创建挂载目录
mkdir -p /nfs/data
# 应用rpcbind
systemctl enable rpcbind
# 应用NFS服务器
systemctl enable nfs-server
# 启动rpcbind
systemctl start rpcbind
# 启动NFS服务器
systemctl start nfs-server
# 激活上述配置
exportfs -r
# 检查NFS
exportfs
Worker服务器执行:
# 使用Master的内网ip
showmount -e 172.31.0.2
mkdir -p /nfs/data
mount -t nfs 172.31.0.2:/nfs/data /nfs/data
用于实现PV的动态供应,Master执行:
# 创建配置文件
vi sc.yaml
粘贴内容如下(记得切换ip):
## 创建了一个存储类
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: nfs-storage
annotations:
storageclass.kubernetes.io/is-default-class: "true"
provisioner: k8s-sigs.io/nfs-subdir-external-provisioner
parameters:
archiveOnDelete: "true" ## 删除pv的时候,pv的内容是否要备份
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: nfs-client-provisioner
labels:
app: nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: default
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: nfs-client-provisioner
template:
metadata:
labels:
app: nfs-client-provisioner
spec:
serviceAccountName: nfs-client-provisioner
containers:
- name: nfs-client-provisioner
image: registry.cn-hangzhou.aliyuncs.com/lfy_k8s_images/nfs-subdir-external-provisioner:v4.0.2
# resources:
# limits:
# cpu: 10m
# requests:
# cpu: 10m
volumeMounts:
- name: nfs-client-root
mountPath: /persistentvolumes
env:
- name: PROVISIONER_NAME
value: k8s-sigs.io/nfs-subdir-external-provisioner
- name: NFS_SERVER
value: 172.31.0.2 ## 指定自己nfs服务器地址
- name: NFS_PATH
value: /nfs/data ## nfs服务器共享的目录
volumes:
- name: nfs-client-root
nfs:
server: 172.31.0.2
path: /nfs/data
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: default
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: nfs-client-provisioner-runner
rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["persistentvolumes"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "list", "watch", "update"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "update", "patch"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: run-nfs-client-provisioner
subjects:
- kind: ServiceAccount
name: nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: default
roleRef:
kind: ClusterRole
name: nfs-client-provisioner-runner
apiGroup: rbac.authorization.k8s.io
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: leader-locking-nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: default
rules:
- apiGroups: [""]
resources: ["endpoints"]
verbs: ["get", "list", "watch", "create", "update", "patch"]
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: leader-locking-nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: default
subjects:
- kind: ServiceAccount
name: nfs-client-provisioner
# replace with namespace where provisioner is deployed
namespace: default
roleRef:
kind: Role
name: leader-locking-nfs-client-provisioner
apiGroup: rbac.authorization.k8s.io
# 应用配置文件
kubectl apply -f sc.yaml
用于完成集群指标监控的组件,主节点执行:
# 创建文件
vi metrics.yaml
粘贴内容如下:
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-app: metrics-server
name: metrics-server
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
k8s-app: metrics-server
rbac.authorization.k8s.io/aggregate-to-admin: "true"
rbac.authorization.k8s.io/aggregate-to-edit: "true"
rbac.authorization.k8s.io/aggregate-to-view: "true"
name: system:aggregated-metrics-reader
rules:
- apiGroups:
- metrics.k8s.io
resources:
- pods
- nodes
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
k8s-app: metrics-server
name: system:metrics-server
rules:
- apiGroups:
- ""
resources:
- pods
- nodes
- nodes/stats
- namespaces
- configmaps
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
k8s-app: metrics-server
name: metrics-server-auth-reader
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
k8s-app: metrics-server
name: metrics-server:system:auth-delegator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:auth-delegator
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
k8s-app: metrics-server
name: system:metrics-server
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:metrics-server
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
labels:
k8s-app: metrics-server
name: metrics-server
namespace: kube-system
spec:
ports:
- name: https
port: 443
protocol: TCP
targetPort: https
selector:
k8s-app: metrics-server
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
k8s-app: metrics-server
name: metrics-server
namespace: kube-system
spec:
selector:
matchLabels:
k8s-app: metrics-server
strategy:
rollingUpdate:
maxUnavailable: 0
template:
metadata:
labels:
k8s-app: metrics-server
spec:
containers:
- args:
- --cert-dir=/tmp
- --kubelet-insecure-tls
- --secure-port=4443
- --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
- --kubelet-use-node-status-port
image: registry.cn-hangzhou.aliyuncs.com/lfy_k8s_images/metrics-server:v0.4.3
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 3
httpGet:
path: /livez
port: https
scheme: HTTPS
periodSeconds: 10
name: metrics-server
ports:
- containerPort: 4443
name: https
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /readyz
port: https
scheme: HTTPS
periodSeconds: 10
securityContext:
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
volumeMounts:
- mountPath: /tmp
name: tmp-dir
nodeSelector:
kubernetes.io/os: linux
priorityClassName: system-cluster-critical
serviceAccountName: metrics-server
volumes:
- emptyDir: {}
name: tmp-dir
---
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
labels:
k8s-app: metrics-server
name: v1beta1.metrics.k8s.io
spec:
group: metrics.k8s.io
groupPriorityMinimum: 100
insecureSkipTLSVerify: true
service:
name: metrics-server
namespace: kube-system
version: v1beta1
versionPriority: 100
应用配置文件:
kubectl apply -f metrics.yaml
主节点执行:
# 安装wget、vim工具
yum install -y wget
yum install -y vim
# 下载kubesphere安装器
wget https://github.com/kubesphere/ks-installer/releases/download/v3.1.1/kubesphere-installer.yaml
# 下载集群配置文件
wget https://github.com/kubesphere/ks-installer/releases/download/v3.1.1/cluster-configuration.yaml
修改cluster-configuration.yaml如下:
---
apiVersion: installer.kubesphere.io/v1alpha1
kind: ClusterConfiguration
metadata:
name: ks-installer
namespace: kubesphere-system
labels:
version: v3.1.1
spec:
persistence:
storageClass: "" # If there is no default StorageClass in your cluster, you need to specify an existing StorageClass here.
authentication:
jwtSecret: "" # Keep the jwtSecret consistent with the Host Cluster. Retrieve the jwtSecret by executing "kubectl -n kubesphere-system get cm kubesphere-config -o yaml | grep -v "apiVersion" | grep jwtSecret" on the Host Cluster.
local_registry: "" # Add your private registry address if it is needed.
etcd:
monitoring: true # Enable or disable etcd monitoring dashboard installation. You have to create a Secret for etcd before you enable it.
endpointIps: 172.31.0.2 # etcd cluster EndpointIps. It can be a bunch of IPs here.
port: 2379 # etcd port.
tlsEnable: true
common:
redis:
enabled: true
openldap:
enabled: true
minioVolumeSize: 20Gi # Minio PVC size.
openldapVolumeSize: 2Gi # openldap PVC size.
redisVolumSize: 2Gi # Redis PVC size.
monitoring:
# type: external # Whether to specify the external prometheus stack, and need to modify the endpoint at the next line.
endpoint: http://prometheus-operated.kubesphere-monitoring-system.svc:9090 # Prometheus endpoint to get metrics data.
es: # Storage backend for logging, events and auditing.
# elasticsearchMasterReplicas: 1 # The total number of master nodes. Even numbers are not allowed.
# elasticsearchDataReplicas: 1 # The total number of data nodes.
elasticsearchMasterVolumeSize: 4Gi # The volume size of Elasticsearch master nodes.
elasticsearchDataVolumeSize: 20Gi # The volume size of Elasticsearch data nodes.
logMaxAge: 7 # Log retention time in built-in Elasticsearch. It is 7 days by default.
elkPrefix: logstash # The string making up index names. The index name will be formatted as ks--log.
basicAuth:
enabled: false
username: ""
password: ""
externalElasticsearchUrl: ""
externalElasticsearchPort: ""
console:
enableMultiLogin: true # Enable or disable simultaneous logins. It allows different users to log in with the same account at the same time.
port: 30880
alerting: # (CPU: 0.1 Core, Memory: 100 MiB) It enables users to customize alerting policies to send messages to receivers in time with different time intervals and alerting levels to choose from.
enabled: true # Enable or disable the KubeSphere Alerting System.
# thanosruler:
# replicas: 1
# resources: {}
auditing: # Provide a security-relevant chronological set of records,recording the sequence of activities happening on the platform, initiated by different tenants.
enabled: true # Enable or disable the KubeSphere Auditing Log System.
devops: # (CPU: 0.47 Core, Memory: 8.6 G) Provide an out-of-the-box CI/CD system based on Jenkins, and automated workflow tools including Source-to-Image & Binary-to-Image.
enabled: true # Enable or disable the KubeSphere DevOps System.
jenkinsMemoryLim: 2Gi # Jenkins memory limit.
jenkinsMemoryReq: 1500Mi # Jenkins memory request.
jenkinsVolumeSize: 8Gi # Jenkins volume size.
jenkinsJavaOpts_Xms: 512m # The following three fields are JVM parameters.
jenkinsJavaOpts_Xmx: 512m
jenkinsJavaOpts_MaxRAM: 2g
events: # Provide a graphical web console for Kubernetes Events exporting, filtering and alerting in multi-tenant Kubernetes clusters.
enabled: true # Enable or disable the KubeSphere Events System.
ruler:
enabled: true
replicas: 2
logging: # (CPU: 57 m, Memory: 2.76 G) Flexible logging functions are provided for log query, collection and management in a unified console. Additional log collectors can be added, such as Elasticsearch, Kafka and Fluentd.
enabled: true # Enable or disable the KubeSphere Logging System.
logsidecar:
enabled: true
replicas: 2
metrics_server: # (CPU: 56 m, Memory: 44.35 MiB) It enables HPA (Horizontal Pod Autoscaler).
enabled: false # Enable or disable metrics-server.
monitoring:
storageClass: "" # If there is an independent StorageClass you need for Prometheus, you can specify it here. The default StorageClass is used by default.
# prometheusReplicas: 1 # Prometheus replicas are responsible for monitoring different segments of data source and providing high availability.
prometheusMemoryRequest: 400Mi # Prometheus request memory.
prometheusVolumeSize: 20Gi # Prometheus PVC size.
# alertmanagerReplicas: 1 # AlertManager Replicas.
multicluster:
clusterRole: none # host | member | none # You can install a solo cluster, or specify it as the Host or Member Cluster.
network:
networkpolicy: # Network policies allow network isolation within the same cluster, which means firewalls can be set up between certain instances (Pods).
# Make sure that the CNI network plugin used by the cluster supports NetworkPolicy. There are a number of CNI network plugins that support NetworkPolicy, including Calico, Cilium, Kube-router, Romana and Weave Net.
enabled: true # Enable or disable network policies.
ippool: # Use Pod IP Pools to manage the Pod network address space. Pods to be created can be assigned IP addresses from a Pod IP Pool.
type: calico # Specify "calico" for this field if Calico is used as your CNI plugin. "none" means that Pod IP Pools are disabled.
topology: # Use Service Topology to view Service-to-Service communication based on Weave Scope.
type: none # Specify "weave-scope" for this field to enable Service Topology. "none" means that Service Topology is disabled.
openpitrix: # An App Store that is accessible to all platform tenants. You can use it to manage apps across their entire lifecycle.
store:
enabled: true # Enable or disable the KubeSphere App Store.
servicemesh: # (0.3 Core, 300 MiB) Provide fine-grained traffic management, observability and tracing, and visualized traffic topology.
enabled: true # Base component (pilot). Enable or disable KubeSphere Service Mesh (Istio-based).
kubeedge: # Add edge nodes to your cluster and deploy workloads on edge nodes.
enabled: true # Enable or disable KubeEdge.
cloudCore:
nodeSelector: {"node-role.kubernetes.io/worker": ""}
tolerations: []
cloudhubPort: "10000"
cloudhubQuicPort: "10001"
cloudhubHttpsPort: "10002"
cloudstreamPort: "10003"
tunnelPort: "10004"
cloudHub:
advertiseAddress: # At least a public IP address or an IP address which can be accessed by edge nodes must be provided.
- "" # Note that once KubeEdge is enabled, CloudCore will malfunction if the address is not provided.
nodeLimit: "100"
service:
cloudhubNodePort: "30000"
cloudhubQuicNodePort: "30001"
cloudhubHttpsNodePort: "30002"
cloudstreamNodePort: "30003"
tunnelNodePort: "30004"
edgeWatcher:
nodeSelector: {"node-role.kubernetes.io/worker": ""}
tolerations: []
edgeWatcherAgent:
nodeSelector: {"node-role.kubernetes.io/worker": ""}
tolerations: []
# 安装核心文件
kubectl apply -f kubesphere-installer.yaml
kubectl apply -f cluster-configuration.yaml
4.3的安装流程会比较久(大概20分钟),使用下述命令查看KubeSphere安装进度(主节点执行):
# 查看KubeSphere安装进度
kubectl logs -n kubesphere-system $(kubectl get pod -n kubesphere-system -l app=ks-install -o jsonpath='{.items[0].metadata.name}') -f
最终控制台会打印出登录KubeSphere的账号密码:
还需要解决etcd监控证书找不到问题:
kubectl -n kubesphere-monitoring-system create secret generic kube-etcd-client-certs --from-file=etcd-client-ca.crt=/etc/kubernetes/pki/etcd/ca.crt --from-file=etcd-client.crt=/etc/kubernetes/pki/apiserver-etcd-client.crt --from-file=etcd-client.key=/etc/kubernetes/pki/apiserver-etcd-client.key
等待所有pod处于Running,即可登录KubeSphere
Console: 公网ip:30880
Account: admin
Password: P@88w0rd