污点 (Taint) 是使节点与 Pod 产生排斥的一类规则
污点策略通过嵌合在键值对上的污点标签进行声明实现
格式为key=value:[污点标签]
kubectl describe nodes [节点名字]
kubectl taint node [节点名字] key=value:污点标签
kubectl taint node [节点名字] key=value:污点标签-
# 查看污点策略
[root@master ~]# kubectl describe nodes|grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints:
Taints:
Taints:
Taints:
Taints:
# node-0001 设置污点策略 PreferNoSchedule
[root@master ~]# kubectl taint node node-0001 k=v1:PreferNoSchedule
node/node-0001 tainted
# node-0002 设置污点策略 NoSchedule
[root@master ~]# kubectl taint node node-0002 k=v2:NoSchedule
node/node-0002 tainted
[root@master ~]# kubectl describe nodes |grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: k=v1:PreferNoSchedule
Taints: k=v2:NoSchedule
Taints:
Taints:
Taints:
[root@master ~]# vim myphp.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: myphp
spec:
containers:
- name: php
image: myos:php-fpm
resources:
requests:
cpu: 1500m
# 优先使用没有污点的节点
[root@master ~]# sed "s,myphp,php1," myphp.yaml |kubectl apply -f -
pod/php1 created
[root@master ~]# sed "s,myphp,php2," myphp.yaml |kubectl apply -f -
pod/php2 created
[root@master ~]# sed "s,myphp,php3," myphp.yaml |kubectl apply -f -
pod/php3 created
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 13s 10.244.3.35 node-0003
php2 1/1 Running 0 5s 10.244.4.32 node-0004
php3 1/1 Running 0 5s 10.244.5.34 node-0005
# 在没有其他节点可用的时候,最后使用 PreferNoSchedule 节点
[root@master ~]# sed 's,myphp,php4,' myphp.yaml |kubectl apply -f -
pod/php4 created
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 13s 10.244.3.35 node-0003
php2 1/1 Running 0 5s 10.244.4.32 node-0004
php3 1/1 Running 0 5s 10.244.5.34 node-0005
php4 1/1 Running 0 80s 10.244.1.33 node-0001
# 继续创建Pod,即使Pod创建失败,也不会使用 NoSchedule 节点
[root@master ~]# sed 's,myphp,php5,' myphp.yaml |kubectl apply -f -
pod/php5 created
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 53s 10.244.3.35 node-0003
php2 1/1 Running 0 65s 10.244.4.32 node-0004
php3 1/1 Running 0 75s 10.244.5.34 node-0005
php4 1/1 Running 0 80s 10.244.1.33 node-0001
php5 0/1 Pending 0 5s
设置 NoSchedule 污点标签,只对新建 Pod 有效,对于已经创建完成的 Pod 不会产生影响
# NoSchedule 不会影响已经创建的 Pod
[root@master ~]# kubectl taint node node-0003 k=v3:NoSchedule
node/node-0003 tainted
[root@master ~]# kubectl describe nodes |grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: k=v1:PreferNoSchedule
Taints: k=v2:NoSchedule
Taints: k=v3:NoSchedule
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 53s 10.244.3.35 node-0003
php2 1/1 Running 0 65s 10.244.4.32 node-0004
php3 1/1 Running 0 75s 10.244.5.34 node-0005
php4 1/1 Running 0 80s 10.244.1.33 node-0001
php5 0/1 Pending 0 5s
驱逐策略会删除该节点上的所有 Pod
为 node-0004 设置 NoExecute 策略
# NoExecute 会删除节点上的 Pod
[root@master ~]# kubectl taint node node-0004 k=v4:NoExecute
node/node-0004 tainted
[root@master ~]# kubectl describe nodes |grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: k=v1:PreferNoSchedule
Taints: k=v2:NoSchedule
Taints: k=v3:NoSchedule
Taints: k=v4:NoExecute
# 产看Pod的情况,NoExecute 污点所在节点的Pod已经被删除了
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 53s 10.244.3.35 node-0003
php3 1/1 Running 0 75s 10.244.5.34 node-0005
php4 1/1 Running 0 80s 10.244.1.33 node-0001
php5 0/1 Pending 0 5s
[root@master ~]# kubectl delete pod --all
pod "php1" deleted
pod "php3" deleted
pod "php4" deleted
pod "php5" deleted
[root@master ~]# kubectl taint node node-000{1..4} k-
node/node-0001 untainted
node/node-0002 untainted
node/node-0003 untainted
node/node-0004 untainted
[root@master ~]# kubectl describe nodes |grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints:
Taints:
Taints:
Taints:
Taints:
容忍刚好与污点相反,某些时候我们需要在有污点的节点上运行 Pod,这种无视污点标签的调度方式称为容忍
spec:
tolerations: # 定义容忍策略
- operator: Equal # 匹配方式,必选(Equal,Exists)
key: k1 # 设置键值对 key,为空代表任意键值对
value: v1 # 设置 values 的值
effect: NoSchedule # 设置容忍的标签,为空代表所有污点标签
containers:
节点 node-000{1..2} 设置污点标签 k=v1:NoSchedule
节点 node-000{3..4} 设置污点标签 k=v2:NoSchedule
节点 node-0005 设置污点标签 k=v1:NoExecute
# 节点 node-0001,node-0002 设置污点标签 k=v1:NoSchedule
[root@master ~]# kubectl taint node node-000{1..2} k=v1:NoSchedule
node/node-0001 tainted
node/node-0002 tainted
# 节点 node-0003,node-0004 设置污点标签 k=v2:NoSchedule
[root@master ~]# kubectl taint node node-000{3..4} k=v2:NoSchedule
node/node-0003 tainted
node/node-0004 tainted
# 节点 node-0005 设置污点标签 k=v1:NoExecute
[root@master ~]# kubectl taint node node-0005 k=v1:NoExecute
node/node-0005 tainted
[root@master ~]# kubectl describe nodes |grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints: k=v1:NoSchedule
Taints: k=v1:NoSchedule
Taints: k=v2:NoSchedule
Taints: k=v2:NoSchedule
Taints: k=v1:NoExecute
# 容忍 k=v1:NoSchedule 污点
[root@master ~]# vim myphp.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: myphp
spec:
tolerations:
- operator: Equal # 完全匹配键值对
key: k # 匹配键
value: v1 # 匹配值
effect: NoSchedule # 匹配污点标签
containers:
- name: php
image: myos:php-fpm
resources:
requests:
cpu: 1500m
[root@master ~]# for i in php{1..3};do sed "s,myphp,${i}," myphp.yaml ;done|kubectl apply -f -
pod/php1 created
pod/php2 created
pod/php3 created
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 6s 10.244.1.10 node-0001
php2 1/1 Running 0 6s 10.244.2.11 node-0002
php3 1/1 Pending 0 6s
[root@master ~]# kubectl delete pod --all
pod "php1" deleted
pod "php2" deleted
pod "php3" deleted
# 容忍 k=*:NoSchedule 污点
[root@master ~]# vim myphp.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: myphp
spec:
tolerations:
- operator: Exists # 部分匹配,存在即可
key: k # 键
effect: NoSchedule # 污点标签
containers:
- name: php
image: myos:php-fpm
resources:
requests:
cpu: 1500m
[root@master ~]# for i in php{1..5};do sed "s,myphp,${i}," myphp.yaml ;done|kubectl apply -f -
pod/php1 created
pod/php2 created
pod/php3 created
pod/php4 created
pod/php5 created
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 6s 10.244.1.12 node-0001
php2 1/1 Running 0 6s 10.244.2.21 node-0002
php3 1/1 Running 0 6s 10.244.3.18 node-0003
php3 1/1 Running 0 6s 10.244.4.24 node-0004
php5 1/1 Pending 0 6s
[root@master ~]# kubectl delete pod --all
pod "php1" deleted
pod "php2" deleted
pod "php3" deleted
pod "php4" deleted
pod "php5" deleted
# 容忍所有 node 上的污点
[root@master ~]# vim myphp.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: myphp
spec:
tolerations:
- operator: Exists # 模糊匹配
key: k # 键
effect: "" # 设置空或删除,代表所有污点标签
containers:
- name: php
image: myos:php-fpm
resources:
requests:
cpu: 1500m
[root@master ~]# for i in php{1..5};do sed "s,myphp,${i}," myphp.yaml ;done|kubectl apply -f -
pod/php1 created
pod/php2 created
pod/php3 created
pod/php4 created
pod/php5 created
[root@master ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE
php1 1/1 Running 0 36s 10.244.1.15 node-0001
php2 1/1 Running 0 36s 10.244.2.16 node-0002
php3 1/1 Running 0 36s 10.244.3.19 node-0003
php4 1/1 Running 0 36s 10.244.4.17 node-0004
php5 1/1 Running 0 36s 10.244.5.18 node-0005
[root@master ~]# kubectl taint node node-000{1..5} k-
node/node-0001 untainted
node/node-0002 untainted
node/node-0003 untainted
node/node-0004 untainted
node/node-0005 untainted
[root@master ~]# kubectl describe nodes |grep Taints
Taints: node-role.kubernetes.io/master:NoSchedule
Taints:
Taints:
Taints:
Taints:
Taints:
[root@master ~]# kubectl delete pod --all
pod "php1" deleted
pod "php2" deleted
pod "php3" deleted
pod "php4" deleted
pod "php5" deleted
优先级表示一个 Pod 相比于其他 Pod 的重要性
优先级可以保证重要的 Pod 被调度运行
如何使用优先级和抢占?
PriorityClass 是一个全局资源对象,它定义了从优先级类名称到优先级整数值的映射。优先级在 value 字段中指定可以设置小于 10亿 的整数值,值越大,优先级越高。
globalDefault 用于设置默认优先级状态,如果没有任何优先级设置Pod的优先级为零
description 用来配置描述性信息,告诉用户优先级的用途
非抢占优先:指的是在调度阶段优先进行调度分配,一旦容器调度完成就不可以抢占,资源不足时,只能等待
抢占优先:强制调度一个 Pod ,如果资源不足无法被调度调度程序会抢占(删除) 较低优先级的 Pod 的资源,来保证高优先级Pod的运行
# 定义优先级(队列优先)
[root@master ~]# vim mypriority.yaml
---
kind: PriorityClass
apiVersion: scheduling.k8s.io/v1
metadata:
name: high-non # 优先级名称
preemptionPolicy: Never # 策略:非抢占
value: 1000 # 优先级
---
kind: PriorityClass
apiVersion: scheduling.k8s.io/v1
metadata:
name: low-non # 优先级名称
preemptionPolicy: Never # 策略:非抢占
value: 500 # 优先级
[root@master ~]# kubectl apply -f mypriority.yaml
priorityclass.scheduling.k8s.io/high-non created
priorityclass.scheduling.k8s.io/low-non created
[root@master ~]# kubectl get priorityclasses.scheduling.k8s.io
NAME VALUE GLOBAL-DEFAULT AGE
high-non 1000 false 12s
low-non 500 false 12s
system-cluster-critical 2000000000 false 45h
system-node-critical 2000001000 false 45h
# 无优先级的 Pod
[root@master ~]# cat php1.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: php1
spec:
nodeSelector:
kubernetes.io/hostname: node-0004
containers:
- name: php
image: myos:php-fpm
resources:
requests:
cpu: "1500m"
# 低优先级 Pod
[root@master ~]# cat php2.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: php2
spec:
nodeSelector:
kubernetes.io/hostname: node-0004
priorityClassName: low-non # 指定优先级的名字
containers:
- name: php
image: myos:php-fpm
resources:
requests:
cpu: "1500m"
# 高优先级 Pod
[root@master ~]# cat php3.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: php3
spec:
nodeSelector:
kubernetes.io/hostname: node-0004
priorityClassName: high-non # 指定优先级的名字
containers:
- name: php
image: myos:php-fpm
resources:
requests:
cpu: "1500m"
[root@master ~]# kubectl apply -f php1.yaml
pod/php1 created
[root@master ~]# kubectl apply -f php2.yaml
pod/php2 created
[root@master ~]# kubectl apply -f php3.yaml
pod/php3 created
[root@master ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
php1 1/1 Running 0 9s
php2 0/1 Pending 0 6s
php3 0/1 Pending 0 4s
[root@master ~]# kubectl delete pod php1
pod "php1" deleted
[root@master ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
php2 0/1 Pending 0 20s
php3 1/1 Running 0 18s
# 清理实验 Pod
[root@master ~]# kubectl delete pod php2 php3
pod "php2" deleted
pod "php3" deleted
[root@master ~]# vim mypriority.yaml
---
kind: PriorityClass
apiVersion: scheduling.k8s.io/v1
metadata:
name: high-non
preemptionPolicy: Never
value: 1000
---
kind: PriorityClass
apiVersion: scheduling.k8s.io/v1
metadata:
name: low-non
preemptionPolicy: Never
value: 500
---
kind: PriorityClass
apiVersion: scheduling.k8s.io/v1
metadata:
name: high
preemptionPolicy: PreemptLowerPriority
value: 1000
---
kind: PriorityClass
apiVersion: scheduling.k8s.io/v1
metadata:
name: low
preemptionPolicy: PreemptLowerPriority
value: 500
[root@master ~]# kubectl apply -f mypriority.yaml
priorityclass.scheduling.k8s.io/high created
priorityclass.scheduling.k8s.io/low created
[root@master ~]# kubectl get priorityclasses.scheduling.k8s.io
NAME VALUE GLOBAL-DEFAULT AGE
high 1000 false 4s
high-non 1000 false 2h
low 500 false 4s
low-non 500 false 2h
system-cluster-critical 2000000000 false 21d
system-node-critical 2000001000 false 21d
# 替换优先级策略
[root@master ~]# sed 's,-non,,' -i php?.yaml
# 默认优先级 Pod
[root@master ~]# kubectl apply -f php1.yaml
pod/php1 created
[root@master ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
php1 1/1 Running 0 6s
# 高优先级 Pod
[root@master ~]# kubectl apply -f php3.yaml
pod/php3 created
[root@master ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
php3 1/1 Running 0 9s
# 低优先级 Pod
[root@master ~]# kubectl apply -f php2.yaml
pod/php2 created
[root@master ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
php2 0/1 Pending 0 3s
php3 1/1 Running 0 9s
# 清理实验 Pod
[root@master ~]# kubectl delete pod --all
pod "php2" deleted
pod "php3" deleted
[root@master ~]# kubectl delete -f mypriority.yaml
priorityclass.scheduling.k8s.io "high-non" deleted
priorityclass.scheduling.k8s.io "low-non" deleted
priorityclass.scheduling.k8s.io "high" deleted
priorityclass.scheduling.k8s.io "low" deleted
容器是通过名称空间技术隔离的,有时候我们执行一些应用服务,需要使用或修改敏感的系统信息,这时容器需要突破隔离限制,获取更高的权限,这类容器统称特权容器。
运行特权容器会有一些安全风险,这种模式下运行容器对宿主机拥有root 访问权限,可以突破隔离直接控制宿主机的资源配置
更改容器主机名 和 /etc/hosts 文件
[root@master ~]# vim root.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: root
spec:
hostname: myhost # 修改主机名
hostAliases: # 修改 /etc/hosts
- ip: 192.168.1.30 # IP 地址
hostnames: # 名称键值对
- harbor # 主机名
containers:
- name: apache
image: myos:httpd
[root@master ~]# kubectl apply -f root.yaml
pod/root created
[root@master ~]# kubectl exec -it root -- /bin/bash
[root@myhost html]# hostname
myhost
[root@myhost html]# cat /etc/hosts
... ...
# Entries added by HostAliases.
192.168.1.30 harbor
[root@master ~]# kubectl delete pod root
pod "root" deleted
[root@master ~]# vim root.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: root
spec:
hostPID: true # 特权,共享系统进程
hostNetwork: true # 特权,共享主机网络
containers:
- name: apache
image: myos:httpd
securityContext: # 安全上下文值
privileged: true # root特权容器
[root@master ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
root 1/1 Running 0 26s
[root@master ~]# kubectl exec -it root -- /bin/bash
[root@node-0001 /]#
# 系统进程特权
[root@node-0001 /]# pstree -p
systemd(1)-+-NetworkManager(510)-+-dhclient(548)
| |-{NetworkManager}(522)
| `-{NetworkManager}(524)
|-agetty(851)
|-chronyd(502)
|-containerd(531)-+-{containerd}(555)
... ...
# 网络特权
[root@node-0001 /]# ifconfig eth0
eth0: flags=4163 mtu 1500
inet 192.168.1.51 netmask 255.255.255.0 broadcast 192.168.1.255
ether fa:16:3e:70:c8:fa txqueuelen 1000 (Ethernet)
... ...
# root用户特权
[root@node-0001 /]# mkdir /sysroot
[root@node-0001 /]# mount /dev/vda1 /sysroot
[root@node-0001 /]# mount -t proc proc /sysroot/proc
[root@node-0001 /]# chroot /sysroot
sh-4.2# : 此处已经是 node 节点上的 root 用户了
# 删除特权容器
[root@master ~]# kubectl delete pod root
pod "root" deleted
Pod 安全策略是集群级别的资源,它能够控制 Pod 运行的行为,以及它具有访问什么的能力。
如何使用 Pod 安全策略?
PodSecurity 提供一种内置的 Pod 安全性准入控制器,作为 PodSecurityPolicies 特性的后继演化版本。Pod 安全性限制是在 Pod 被创建时,在名字空间层面实施的。
Pod 安全性标准定义了三种不同的策略 (Policy),以广泛覆盖安全应用场景。这些策略是渐进式的,安全级别从高度宽松至高度受限。
privileged:不受限制的策略,提供最大可能范围的权限许可。此策略允许特权提升
baseline:弱限制性的策略,禁止已知的策略提升权限。允许使用默认的 Pod配置
restricted:非常严格的限制性策略,遵循当前的保护 Pod的最佳实践
Kubernetes 定义了一组标签,你可以设置这些标签来定义某个名字空间上 Pod 安全性标准级别。你所选择的标签定义了检测到潜在违例时,所要采取的动作。
enforce:策略违例会导致 Pod 被拒绝
audit:策略违例会触发审计日志,但是 Pod 仍可被接受
warn:策略违例会触发用户可见的警告信息,但是 Pod 仍是被接受的
创建 myprod、mytest 名称空间
# 生产环境设置严格的准入控制
[root@master ~]# kubectl create namespace myprod
namespace/myprod created
[root@master ~]# kubectl label namespaces myprod pod-security.kubernetes.io/enforce=restricted
namespace/myprod labeled
# 测试环境测试警告提示
[root@master ~]# kubectl create namespace mytest
namespace/mytest created
[root@master ~]# kubectl label namespaces mytest pod-security.kubernetes.io/warn=baseline
namespace/mytest labeled
# 创建特权容器
# myprod 无法创建特权容器
[root@master ~]# kubectl -n myprod apply -f root.yaml
Error from server (Failure): error when creating "root.yaml": host namespaces (hostNetwork=true, hostPID=true), privileged (container "linux" must not set securityContext.privileged=true), allowPrivilegeEscalation != false (container "linux" must set securityContext.allowPrivilegeEscalation=false), unrestricted capabilities (container "linux" must set securityContext.capabilities.drop=["ALL"]), runAsNonRoot != true (pod or container "linux" must set securityContext.runAsNonRoot=true), seccompProfile (pod or container "linux" must set securityContext.seccompProfile.type to "RuntimeDefault" or "Localhost")
[root@master ~]#
[root@master ~]# kubectl -n myprod get pods
No resources found in myprod namespace.
# mytest 创建特权容器
[root@master ~]# kubectl -n mytest apply -f root.yaml
Warning: would violate "latest" version of "baseline" PodSecurity profile: host namespaces (hostNetwork=true, hostPID=true), privileged (container "linux" must not set securityContext.privileged=true)
pod/root created
[root@master ~]#
[root@master ~]# kubectl -n mytest get pods
NAME READY STATUS RESTARTS AGE
root 1/1 Running 0 7s
[root@master ~]#
[root@master ~]# vim nonroot.yaml
---
kind: Pod
apiVersion: v1
metadata:
name: nonroot
spec:
restartPolicy: Always
containers:
- name: php
image: myos:php-fpm
securityContext:
allowPrivilegeEscalation: false
runAsNonRoot: true
runAsUser: 65534
seccompProfile:
type: "RuntimeDefault"
capabilities:
drop: ["ALL"]
[root@master ~]# kubectl -n myprod apply -f nonroot.yaml
pod/nonroot created
[root@master ~]# kubectl -n myprod get pods
NAME READY STATUS RESTARTS AGE
nonroot 1/1 Running 0 6s
[root@master ~]# kubectl -n myprod exec -it nonroot -- id
uid=65534(nobody) gid=65534(nobody) groups=65534(nobody)