深入分析kubelet(3)—— ContainerGC
Garbage Collection是非常重要的过程,kubelet有ContainerGC
和ImageGC
两个过程,本文主要分析ContainerGC
code
new
k8s.io\kubernetes\pkg\kubelet\kubelet.go
containerGCPolicy := kubecontainer.ContainerGCPolicy{
MinAge: minimumGCAge.Duration,
MaxPerPodContainer: int(maxPerPodContainerCount),
MaxContainers: int(maxContainerCount),
}
containerGC, err := kubecontainer.NewContainerGC(klet.containerRuntime, containerGCPolicy, klet.sourcesReady)
参数说明:
k8s.io\kubernetes\cmd\kubelet\app\options\options.go
- minimumGCAge,
--minimum-container-ttl-duration
,Minimum age for a finished container before it is garbage collected. Examples: '300ms', '10s' or '2h45m' - maxPerPodContainerCount,
--maximum-dead-containers-per-container
,Maximum number of old instances to retain per container. Each container takes up some disk space. (default 1)。这里描述有点不准确,应该是说每个Pod保留最大的非running容器数。 - maxContainerCount,
--maximum-dead-containers
,Maximum number of old instances of containers to retain globally. Each container takes up some disk space. To disable, set to a negative number. 描述Node上保留的非running容器数。
start gc
k8s.io\kubernetes\pkg\kubelet\kubelet.go
// ContainerGCPeriod is the period for performing container garbage collection.
ContainerGCPeriod = time.Minute
// StartGarbageCollection starts garbage collection threads.
func (kl *Kubelet) StartGarbageCollection() {
go wait.Until(func() {
kl.containerGC.GarbageCollect()
}, ContainerGCPeriod, wait.NeverStop)
}
每分钟执行一次GC
gc
k8s.io\kubernetes\pkg\kubelet\kuberuntime\kuberuntime_gc.go
// GarbageCollect removes dead containers using the specified container gc policy.
// Note that gc policy is not applied to sandboxes. Sandboxes are only removed when they are
// not ready and containing no containers.
//
// GarbageCollect consists of the following steps:
// * gets evictable containers which are not active and created more than gcPolicy.MinAge ago.
// * removes oldest dead containers for each pod by enforcing gcPolicy.MaxPerPodContainer.
// * removes oldest dead containers by enforcing gcPolicy.MaxContainers.
// * gets evictable sandboxes which are not ready and contains no containers.
// * removes evictable sandboxes.
func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictTerminatedPods bool) error {
// Remove evictable containers
cgc.evictContainers(gcPolicy, allSourcesReady, evictTerminatedPods)
// Remove sandboxes with zero containers
cgc.evictSandboxes(evictTerminatedPods)
// Remove pod sandbox log directory
return cgc.evictPodLogsDirectories(allSourcesReady)
}
- 删除可驱逐的容器
- 删除sandboxes
- 删除sandbox log
重点关注删除容器
// evictableContainers gets all containers that are evictable. Evictable containers are: not running
// and created more than MinAge ago.
func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByEvictUnit, error) {
containers, err := cgc.manager.getKubeletContainers(true)
evictUnits := make(containersByEvictUnit)
newestGCTime := time.Now().Add(-minAge)
for _, container := range containers {
// Prune out running containers.
if container.State == runtimeapi.ContainerState_CONTAINER_RUNNING {
continue
}
createdAt := time.Unix(0, container.CreatedAt)
if newestGCTime.Before(createdAt) {
continue
}
}
// Sort the containers by age.
for uid := range evictUnits {
sort.Sort(byCreated(evictUnits[uid]))
}
return evictUnits, nil
}
驱逐判断逻辑:非running的并且存活时间超过--minimum-container-ttl-duration
时间的,然后按照创建age
排序。这里需要注意,evictUnits
是以Pod信息作为key,容器list作为value
// evict all containers that are evictable
func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictTerminatedPods bool) error {
// Separate containers by evict units.
evictUnits, err := cgc.evictableContainers(gcPolicy.MinAge)
// Remove deleted pod containers if all sources are ready.
if allSourcesReady {
for key, unit := range evictUnits {
if cgc.podStateProvider.IsPodDeleted(key.uid) || (cgc.podStateProvider.IsPodTerminated(key.uid) && evictTerminatedPods) {
cgc.removeOldestN(unit, len(unit)) // Remove all.
delete(evictUnits, key)
}
}
}
// Enforce max containers per evict unit.
if gcPolicy.MaxPerPodContainer >= 0 {
cgc.enforceMaxContainersPerEvictUnit(evictUnits, gcPolicy.MaxPerPodContainer)
}
// Enforce max total number of containers.
if gcPolicy.MaxContainers >= 0 && evictUnits.NumContainers() > gcPolicy.MaxContainers {
// Leave an equal number of containers per evict unit (min: 1).
numContainersPerEvictUnit := gcPolicy.MaxContainers / evictUnits.NumEvictUnits()
if numContainersPerEvictUnit < 1 {
numContainersPerEvictUnit = 1
}
cgc.enforceMaxContainersPerEvictUnit(evictUnits, numContainersPerEvictUnit)
// If we still need to evict, evict oldest first.
numContainers := evictUnits.NumContainers()
if numContainers > gcPolicy.MaxContainers {
flattened := make([]containerGCInfo, 0, numContainers)
for key := range evictUnits {
flattened = append(flattened, evictUnits[key]...)
}
sort.Sort(byCreated(flattened))
cgc.removeOldestN(flattened, numContainers-gcPolicy.MaxContainers)
}
}
return nil
}
// enforceMaxContainersPerEvictUnit enforces MaxPerPodContainer for each evictUnit.
func (cgc *containerGC) enforceMaxContainersPerEvictUnit(evictUnits containersByEvictUnit, MaxContainers int) {
for key := range evictUnits {
toRemove := len(evictUnits[key]) - MaxContainers
if toRemove > 0 {
evictUnits[key] = cgc.removeOldestN(evictUnits[key], toRemove)
}
}
}
驱逐策略:
- 获取可驱逐的单位
- 如果Pod被删除了,就删除Pod下所有的容器
- 如果设置
MaxPerPodContainer
,就删除Pod中最老的容器,注意是对Pod内容器GC - 如果设置
MaxContainers
,并且可驱逐的容器总数量大于它,就删除 - 首先把每个Pod均分删除指标
- 不够的话,就删除节点上最老的容器
// removeContainer removes the container and the container logs.
// Notice that we remove the container logs first, so that container will not be removed if
// container logs are failed to be removed, and kubelet will retry this later. This guarantees
// that container logs to be removed with the container.
// Notice that we assume that the container should only be removed in non-running state, and
// it will not write container logs anymore in that state.
func (m *kubeGenericRuntimeManager) removeContainer(containerID string) error {
// Remove the container log.
// TODO: Separate log and container lifecycle management.
if err := m.removeContainerLog(containerID); err != nil {
return err
}
// Remove the container.
return m.runtimeService.RemoveContainer(containerID)
}
删除容器的时候需要注意,K8S会首先删除容器日志,然后删除容器,所有有必要做日志采集