https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/
在docker中,我们可以使用cgroup对docker的资源进行限制;在K8S中,同样可以通过yaml文件对Pod资源创建进行限制。
yaml文件的格式在官方文档已经给出了示例,可以通过官方文档直接查看。
Pod和Container的资源请求和限制:
spec.containers[].resources.limits.cpu ##cpu上限
spec.containers[].resources.limits.memory ##内存上限
spec.containers[].resources.requests.cpu ##创建时分配的基本CPU资源
spec.containers[].resources.requests.memory ##创建时分配的基本内存资源
resources表示资源限制字段
requests表示创建时需要分配的基本资源,也就是资源的下限
limits表示资源上限,即这个pod最大能用到多少资源
以一个官方给出的yaml文件为例
apiVersion: v1
kind: Pod
metadata:
name: frontend
spec:
containers:
- name: app
image: images.my-company.example/app:v4
resources:
requests:
memory: "64Mi"
cpu: "250m"
limits:
memory: "128Mi"
cpu: "500m"
- name: log-aggregator
image: images.my-company.example/log-aggregator:v6
resources:
requests:
memory: "64Mi"
cpu: "250m"
limits:
memory: "128Mi"
cpu: "500m"
apiVersion: v1 ##版本为v1
kind: Pod ##创建的类型为pod
metadata: ##标签,可以自行定义
name: frontend ##名称,自行定义
spec: ##定义属性
containers: ##定义container的属性特征
- name: app ##容器名叫app
image: images.my-company.example/app:v4 ##镜像来源
resources: ##资源限制字段
requests: ##资源创建的基本配置
memory: "64Mi" ##内存最低为64M
cpu: "250m" ##这里的250m指的是权重,指一个CPU核心资源在一个时间分片中占比为25%
limits: ##资源上限
memory: "128Mi" ##限制最大内存是128M
cpu: "500m" ##最大占比为50%
以下为另一个容器,配置基本相同
- name: log-aggregator
image: images.my-company.example/log-aggregator:v6
resources:
requests:
memory: "64Mi"
cpu: "250m"
limits:
memory: "128Mi"
cpu: "500m"
在master上,创建一个mysql与wordpress
[root@master ~]# mkdir test
[root@master ~]# cd test/
[root@master test]# vim pod.yaml
apiVersion: v1
kind: Pod
metadata:
name: frontend
spec:
containers:
- name: db
image: mysql
env:
- name: MYSQL_ROOT_PASSWORD
value: "password"
resources:
requests:
memory: "64Mi"
cpu: "250m"
limits:
memory: "128Mi"
cpu: "500m"
- name: wp
image: wordpress
resources:
requests:
memory: "64Mi"
cpu: "250m"
limits:
memory: "128Mi"
cpu: "500m"
[root@master test]# kubectl apply -f pod.yaml
pod/frontend created
[root@master test]# kubectl describe pod frontend
Name: frontend
Namespace: default
Priority: 0
PriorityClassName: <none>
Node: 14.0.0.77/14.0.0.77
Start Time: Wed, 14 Oct 2020 10:18:01 +0800
Labels: <none>
Annotations: kubectl.kubernetes.io/last-applied-configuration:
{"apiVersion":"v1","kind":"Pod","metadata":{"annotations":{},"name":"frontend","namespace":"default"},"spec":{"containers":[{"env":[{"name...
Status: Running
IP: 172.17.75.4
Containers:
db:
Container ID: docker://4c6c3979d3c2a8e9309ee62831633dede6ecfcc45c48f707bb62b4f4cfa1f45c
Image: mysql
Image ID: docker-pullable://mysql@sha256:86b7c83e24c824163927db1016d5ab153a9a04358951be8b236171286e3289a4
Port: <none>
Host Port: <none>
State: Running
Started: Wed, 14 Oct 2020 10:19:41 +0800
Last State: Terminated
Reason: OOMKilled
Exit Code: 137
Started: Wed, 14 Oct 2020 10:19:15 +0800
Finished: Wed, 14 Oct 2020 10:19:24 +0800
Ready: True
Restart Count: 2
Limits:
cpu: 500m
memory: 128Mi
Requests:
cpu: 250m
memory: 64Mi
Environment:
MYSQL_ROOT_PASSWORD: password
Mounts:
/var/run/secrets/kubernetes.io/serviceaccount from default-token-mfrmh (ro)
wp:
Container ID: docker://f50cd3a2e8fc14c5f0b9824e6255f303091f64fd853d14035ff16a941b8d04f7
Image: wordpress
Image ID: docker-pullable://wordpress@sha256:785e6e7376dbf270dde7b581e967aafe5ef7d4ef8cb59fb4652e424425985960
Port: <none>
Host Port: <none>
State: Running
Started: Wed, 14 Oct 2020 10:19:13 +0800
Ready: True
Restart Count: 0
Limits:
cpu: 500m
memory: 128Mi
Requests:
cpu: 250m
memory: 64Mi
Environment: <none>
Mounts:
/var/run/secrets/kubernetes.io/serviceaccount from default-token-mfrmh (ro)
Conditions:
Type Status
Initialized True
Ready True
ContainersReady True
PodScheduled True
Volumes:
default-token-mfrmh:
Type: Secret (a volume populated by a Secret)
SecretName: default-token-mfrmh
Optional: false
QoS Class: Burstable
Node-Selectors: <none>
Tolerations: node.kubernetes.io/not-ready:NoExecute for 300s
node.kubernetes.io/unreachable:NoExecute for 300s
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal Scheduled 105s default-scheduler Successfully assigned default/frontend to 14.0.0.77
Normal Pulling 67s kubelet, 14.0.0.77 pulling image "wordpress"
Normal Created 33s kubelet, 14.0.0.77 Created container
Normal Started 33s kubelet, 14.0.0.77 Started container
Normal Pulled 33s kubelet, 14.0.0.77 Successfully pulled image "wordpress"
Warning BackOff 21s kubelet, 14.0.0.77 Back-off restarting failed container
Normal Pulling 10s (x3 over 103s) kubelet, 14.0.0.77 pulling image "mysql"
Normal Started 5s (x3 over 67s) kubelet, 14.0.0.77 Started container
Normal Pulled 5s (x3 over 67s) kubelet, 14.0.0.77 Successfully pulled image "mysql"
Normal Created 5s (x3 over 67s) kubelet, 14.0.0.77 Created container
查看节点信息,这个节点是创建上述pod资源的节点
[root@master test]# kubectl describe nodes 14.0.0.77
Name: 14.0.0.77
Roles: <none>
Labels: beta.kubernetes.io/arch=amd64
beta.kubernetes.io/os=linux
kubernetes.io/hostname=14.0.0.77
Annotations: node.alpha.kubernetes.io/ttl: 0
volumes.kubernetes.io/controller-managed-attach-detach: true
CreationTimestamp: Mon, 12 Oct 2020 09:54:52 +0800
Taints: <none>
Unschedulable: false
Conditions:
Type Status LastHeartbeatTime LastTransitionTime Reason Message
---- ------ ----------------- ------------------ ------ -------
OutOfDisk False Wed, 14 Oct 2020 11:02:58 +0800 Mon, 12 Oct 2020 09:54:52 +0800 KubeletHasSufficientDisk kubelet has sufficient disk space available
MemoryPressure False Wed, 14 Oct 2020 11:02:58 +0800 Mon, 12 Oct 2020 09:54:52 +0800 KubeletHasSufficientMemory kubelet has sufficient memory available
DiskPressure False Wed, 14 Oct 2020 11:02:58 +0800 Mon, 12 Oct 2020 09:54:52 +0800 KubeletHasNoDiskPressure kubelet has no disk pressure
PIDPressure False Wed, 14 Oct 2020 11:02:58 +0800 Mon, 12 Oct 2020 09:54:52 +0800 KubeletHasSufficientPID kubelet has sufficient PID available
Ready True Wed, 14 Oct 2020 11:02:58 +0800 Mon, 12 Oct 2020 17:13:18 +0800 KubeletReady kubelet is posting ready status
Addresses:
InternalIP: 14.0.0.77
Hostname: 14.0.0.77
Capacity:
cpu: 4
ephemeral-storage: 15342Mi
hugepages-1Gi: 0
hugepages-2Mi: 0
memory: 3861512Ki
pods: 110
Allocatable:
cpu: 4
ephemeral-storage: 14478527669
hugepages-1Gi: 0
hugepages-2Mi: 0
memory: 3759112Ki
pods: 110
System Info:
Machine ID: 830d827eebc640f69410f59d5235ca1c
System UUID: 447D4D56-AADD-2066-B07B-8B32E255DF68
Boot ID: 85cb1728-4641-4564-bc24-d2002f4159d3
Kernel Version: 3.10.0-957.el7.x86_64
OS Image: CentOS Linux 7 (Core)
Operating System: linux
Architecture: amd64
Container Runtime Version: docker://19.3.13
Kubelet Version: v1.12.3
Kube-Proxy Version: v1.12.3
Non-terminated Pods: (3 in total)
Namespace Name CPU Requests CPU Limits Memory Requests Memory Limits
--------- ---- ------------ ---------- --------------- -------------
default frontend 500m (12%) 1 (25%) 128Mi (3%) 256Mi (6%)
default nginx-7697996758-77dqd 0 (0%) 0 (0%) 0 (0%) 0 (0%)
default nginx-7697996758-bbb7w 0 (0%) 0 (0%) 0 (0%) 0 (0%)
Allocated resources:
(Total limits may be over 100 percent, i.e., overcommitted.)
Resource Requests Limits
-------- -------- ------
cpu 500m (12%) 1 (25%)
memory 128Mi (3%) 256Mi (6%)
Events: <none>
[root@master test]# kubectl get pods -w ##监控创建过程
NAME READY STATUS RESTARTS AGE
frontend 0/2 ContainerCreating 0 9s
frontend 2/2 Running 0 32s
[root@master test]# kubectl get pods ##查看创建的pod资源
NAME READY STATUS RESTARTS AGE
frontend 2/2 Running 0 54s
重启策略:Pod在遇到故障之后重启的动作,有以下三种;
[root@master test]# vim pod1.yaml
apiVersion: v1
kind: Pod
metadata:
name: foo
spec:
containers:
- name: busybox
image: busybox
args:
- /bin/sh
- -c
- sleep 10; exit 3
args表示参数,在/bin/bash环境中执行sleep命令与exit命令,睡眠30秒后退出
[root@master test]# kubectl apply -f pod1.yaml ##创建pod资源,apply除了更新,也有创建的功能
pod/foo created
[root@master test]# kubectl get pods -w ##通过监控可以发现,foo一直在重启,周期为10秒
NAME READY STATUS RESTARTS AGE
foo 0/1 ContainerCreating 0 7s
frontend 2/2 Running 0 33m
foo 1/1 Running 0 20s
foo 0/1 Error 0 30s
foo 1/1 Running 1 40s
foo 0/1 Error 1 50s
[root@master test]# kubectl get pods ##重启策略生效
NAME READY STATUS RESTARTS AGE
foo 1/1 Running 3 2m2s
frontend 2/2 Running 0 34m
在pod1.yaml文件中将重启策略restartPolicy: Never加进去,重新创建pod
[root@master test]# kubectl delete -f pod1.yaml
pod "foo" deleted
[root@master test]# vim pod1.yaml
apiVersion: v1
kind: Pod
metadata:
name: foo
spec:
containers:
- name: busybox
image: busybox
args:
- /bin/sh
- -c
- sleep 10
restartPolicy: Never
[root@master test]# kubectl get pods -w
NAME READY STATUS RESTARTS AGE
foo 0/1 ContainerCreating 0 7s
frontend 2/2 Running 0 48m
foo 1/1 Running 0 11s
foo 0/1 Completed 0 21s
https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
健康检查:又称为探针(Probe)
(注意:)规则可以同时定义
livenessProbe 如果检查失败,将杀死容器,根据Pod的restartPolicy来操作。
ReadinessProbe 如果检查失败,kubernetes会把Pod从service endpoints中剔除。
Probe支持三种检查方法:
[root@master test]# vim pod2.yaml
apiVersion: v1
kind: Pod
metadata:
labels:
test: liveness
name: liveness-exec
spec:
containers:
- name: liveness
image: busybox
args:
- /bin/sh
- -c
- touch /tmp/healthy; sleep 10; rm -rf /tmp/healthy;sleep 30
livenessProbe:
exec:
command:
- cat
- /tmp/healthy
initialDelaySeconds: 5
periodSeconds: 5
这个yaml文件实现的功能是,创建一个空文件touch /tmp/healthy,睡眠10秒,然后删除这个文件,然后再睡眠30秒。探针类型为livenessProbe,实现的目录是执行命令cat /tmp/healthy,如果命令执行不成功就杀死这个容器,在探针命令执行之前间隔5S,每5S执行一次。
但是这里有一个问题是,在yaml文件中,我没有设置重启策略,默认是Always,所以在检测到没有这个文件时会杀死容器,又会根据重启策略进行重启。
[root@master test]# kubectl apply -f pod2.yaml
pod/liveness-exec created
[root@master test]# kubectl get pods -w ##通过对pod的观察,发现探针功能正常运行
NAME READY STATUS RESTARTS AGE
foo 0/1 Completed 0 10h
frontend 2/2 Running 0 11h
liveness-exec 0/1 ContainerCreating 0 7s
liveness-exec 1/1 Running 0 35s
liveness-exec 1/1 Running 1 93s
liveness-exec 1/1 Running 2 2m22s
另外一种类型的存活探测方式是使用 HTTP GET 请求。 下面是一个 Pod 的配置文件,其中运行一个基于 k8s.gcr.io/liveness 镜像的容器。这种探针的方式适用于web集群。
apiVersion: v1
kind: Pod
metadata:
labels:
test: liveness
name: liveness-http
spec:
containers:
- name: liveness
image: k8s.gcr.io/liveness
args:
- /server
livenessProbe:
httpGet:
path: /healthz
port: 8080
httpHeaders:
- name: Custom-Header
value: Awesome
initialDelaySeconds: 3
periodSeconds: 3
在这个配置文件中,可以看到 Pod 也只有一个容器。 periodSeconds 字段指定了 kubelet 每隔 3 秒执行一次存活探测。 initialDelaySeconds 字段告诉 kubelet 在执行第一次探测前应该等待 3 秒。 kubelet 会向容器内运行的服务(服务会监听 8080 端口)发送一个 HTTP GET 请求来执行探测。 如果服务器上 /healthz 路径下的处理程序返回成功代码,则 kubelet 认为容器是健康存活的。 如果处理程序返回失败代码,则 kubelet 会杀死这个容器并且重新启动它。
第三种类型的存活探测是使用 TCP 套接字。 通过配置,kubelet 会尝试在指定端口和容器建立套接字链接。 如果能建立连接,这个容器就被看作是健康的,如果不能则这个容器就被看作是有问题的。
apiVersion: v1
kind: Pod
metadata:
name: goproxy
labels:
app: goproxy
spec:
containers:
- name: goproxy
image: k8s.gcr.io/goproxy:0.1
ports:
- containerPort: 8080
readinessProbe:
tcpSocket:
port: 8080
initialDelaySeconds: 5
periodSeconds: 10
livenessProbe:
tcpSocket:
port: 8080
initialDelaySeconds: 15
periodSeconds: 20
TCP 检测的配置和 HTTP 检测非常相似。 下面这个例子同时使用就绪和存活探测器。kubelet 会在容器启动 5 秒后发送第一个就绪探测。 这会尝试连接 goproxy 容器的 8080 端口。 如果探测成功,这个 Pod 会被标记为就绪状态,kubelet 将继续每隔 10 秒运行一次检测。
除了就绪探测,这个配置包括了一个存活探测。 kubelet 会在容器启动 15 秒后进行第一次存活探测。 就像就绪探测一样,会尝试连接 goproxy 容器的 8080 端口。 如果存活探测失败,这个容器会被重新启动。