【kubernetes/k8s源码分析】 kubelet container gc 垃圾回收源码分析

WHY

       容器停止文件系统并不会自动清除,通过 docker ps -a 也能够看到这些资源。kubelet有container gc的方案用于清理宿主机上的非所需容器

       容器镜像耗存储资源,会导致宿主机上会留下不再需要的容器镜像,kubelet设计了一套image gc的方案

 

WHAT

       Kubernetes 垃圾回收(Garbage Collection)机制由kubelet完成,kubelet定期清理不再使用的容器和镜像,每分钟进行一次容器的GC,每五分钟进行一次镜像的GC

 

HOW

NewMainKubelet

       -->  kubecontainer.ContainerGCPolicy

       -->  kubecontainer.NewContainerGC

 

  • MinAge: minimum-container-ttl-duration 默认为 0s,容器生命周期结束,间隔时间被回收
  • MaxPerPodContainer:maximum-dead-containers-per-container,默认为1,指每个pods最大可以保存多少个已结束的容器,默认是1,负数表示不限制,这些容器会浪费磁盘空间
  • MaxContainers:maximum-dead-containers,默认为 -1表示不做限制,节点上最多允许保留多少个已结束的容器
containerGCPolicy := kubecontainer.ContainerGCPolicy{
	MinAge:             minimumGCAge.Duration,
	MaxPerPodContainer: int(maxPerPodContainerCount),
	MaxContainers:      int(maxContainerCount),
}

 

1. 实例化 container gc

    路径: pkg/kubelet/kubelet.go

// setup containerGC
containerGC, err := kubecontainer.NewContainerGC(klet.containerRuntime, containerGCPolicy, klet.sourcesReady)
if err != nil {
	return nil, err
}
klet.containerGC = containerGC
klet.containerDeletor = newPodContainerDeletor(klet.containerRuntime, integer.IntMax(containerGCPolicy.MaxPerPodContainer, minDeadContainerInPod))

    1.1 ContainerGC 接口

// Manages garbage collection of dead containers.
//
// Implementation is thread-compatible.
type ContainerGC interface {
	// Garbage collect containers.
	GarbageCollect() error
	// Deletes all unused containers, including containers belonging to pods that are terminated but not deleted
	DeleteAllUnusedContainers() error
}

    1.2 realContainerGC 实现了 ContainerGC 接口

// New ContainerGC instance with the specified policy.
func NewContainerGC(runtime Runtime, policy ContainerGCPolicy, sourcesReadyProvider SourcesReadyProvider) (ContainerGC, error) {
	if policy.MinAge < 0 {
		return nil, fmt.Errorf("invalid minimum garbage collection age: %v", policy.MinAge)
	}

	return &realContainerGC{
		runtime:              runtime,
		policy:               policy,
		sourcesReadyProvider: sourcesReadyProvider,
	}, nil
}

 

createAndInitKubelet

       -->  StartGarbageCollection

                      -->  kl.containerGC.GarbageCollect

                                   -->  GarbageCollect

                                               -->  evictContainers

                                               -->  evictSandboxes

                      -->  kl.imageManager.GarbageCollect

   本文只分析 container gc部分

 

2. StartGarbageCollection

    主要是定期异步执行 container gc 流程

// StartGarbageCollection starts garbage collection threads.
func (kl *Kubelet) StartGarbageCollection() {
	loggedContainerGCFailure := false
	go wait.Until(func() {
		if err := kl.containerGC.GarbageCollect(); err != nil {
			klog.Errorf("Container garbage collection failed: %v", err)
			kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ContainerGCFailed, err.Error())
			loggedContainerGCFailure = true
		} else {
			var vLevel klog.Level = 4
			if loggedContainerGCFailure {
				vLevel = 1
				loggedContainerGCFailure = false
			}

			klog.V(vLevel).Infof("Container garbage collection succeeded")
		}
	}, ContainerGCPeriod, wait.NeverStop)

    2.1 GarbageCollect

       看看GarbageCollect 究竟做了什么,继续

func (cgc *realContainerGC) GarbageCollect() error {
	return cgc.runtime.GarbageCollect(cgc.policy, cgc.sourcesReadyProvider.AllReady(), false)
}
// GarbageCollect removes dead containers using the specified container gc policy.
func (m *kubeGenericRuntimeManager) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
	return m.containerGC.GarbageCollect(gcPolicy, allSourcesReady, evictNonDeletedPods)
}

    2.2 GarbageCollect

     路径:pkg/kubelet/kuberuntime/kuberuntime_gc.go

删除挂掉的容器

// GarbageCollect removes dead containers using the specified container gc policy.
// Note that gc policy is not applied to sandboxes. Sandboxes are only removed when they are
// not ready and containing no containers.
//
// GarbageCollect consists of the following steps:
// * gets evictable containers which are not active and created more than gcPolicy.MinAge ago.
// * removes oldest dead containers for each pod by enforcing gcPolicy.MaxPerPodContainer.
// * removes oldest dead containers by enforcing gcPolicy.MaxContainers.
// * gets evictable sandboxes which are not ready and contains no containers.
// * removes evictable sandboxes.
func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictTerminatedPods bool) error {
	errors := []error{}
	// Remove evictable containers
	if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictTerminatedPods); err != nil {
		errors = append(errors, err)
	}

	// Remove sandboxes with zero containers
	if err := cgc.evictSandboxes(evictTerminatedPods); err != nil {
		errors = append(errors, err)
	}

	// Remove pod sandbox log directory
	if err := cgc.evictPodLogsDirectories(allSourcesReady); err != nil {
		errors = append(errors, err)
	}
	return utilerrors.NewAggregate(errors)
}

 

3. evictContainers 驱逐容器

    3.1 evictableContainers 函数获得可驱逐的容器

      不是running的,并且创建时间  >  minAge 

// evict all containers that are evictable
func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictTerminatedPods bool) error {
	// Separate containers by evict units.
	evictUnits, err := cgc.evictableContainers(gcPolicy.MinAge)
	if err != nil {
		return err
	}

    3.2 如果 pod 是已经删除的,将所有这个pod的容器删除

// Remove deleted pod containers if all sources are ready.
if allSourcesReady {
	for key, unit := range evictUnits {
		if cgc.podStateProvider.IsPodDeleted(key.uid) || (cgc.podStateProvider.IsPodTerminated(key.uid) && evictTerminatedPods) {
			cgc.removeOldestN(unit, len(unit)) // Remove all.
			delete(evictUnits, key)
		}
	}
}

    3.3 如果 gcPolicy.MaxPerPodContainer 大于 0 ,将留下gcPolicy.MaxPerPodContainer 个数的容器,其他都删除

// enforceMaxContainersPerEvictUnit enforces MaxPerPodContainer for each evictUnit.
func (cgc *containerGC) enforceMaxContainersPerEvictUnit(evictUnits containersByEvictUnit, MaxContainers int) {
	for key := range evictUnits {
		toRemove := len(evictUnits[key]) - MaxContainers

		if toRemove > 0 {
			evictUnits[key] = cgc.removeOldestN(evictUnits[key], toRemove)
		}
	}
}

    3.4 如果 MaxContainers 大于 0 情况 ( 默认值为 -1 ,将不进行清理)

     流程也比较简单,就是需不需要在清理容器

// Enforce max total number of containers.
if gcPolicy.MaxContainers >= 0 && evictUnits.NumContainers() > gcPolicy.MaxContainers {
	// Leave an equal number of containers per evict unit (min: 1).
	numContainersPerEvictUnit := gcPolicy.MaxContainers / evictUnits.NumEvictUnits()
	if numContainersPerEvictUnit < 1 {
		numContainersPerEvictUnit = 1
	}
	cgc.enforceMaxContainersPerEvictUnit(evictUnits, numContainersPerEvictUnit)

	// If we still need to evict, evict oldest first.
	numContainers := evictUnits.NumContainers()
	if numContainers > gcPolicy.MaxContainers {
		flattened := make([]containerGCInfo, 0, numContainers)
		for key := range evictUnits {
			flattened = append(flattened, evictUnits[key]...)
		}
		sort.Sort(byCreated(flattened))

		cgc.removeOldestN(flattened, numContainers-gcPolicy.MaxContainers)
	}
}

 

4. evictSandboxes 删除所有可驱逐的 sandbox

    可驱逐的sanbox满足如下条件:

  • 不是 ready 状态的
  • 不包含容器
  • 不应该存在的(已经被删除的),或者不是为pod创建最新的
// evictSandboxes remove all evictable sandboxes. An evictable sandbox must
// meet the following requirements:
//   1. not in ready state
//   2. contains no containers.
//   3. belong to a non-existent (i.e., already removed) pod, or is not the
//      most recently created sandbox for the pod.
func (cgc *containerGC) evictSandboxes(evictTerminatedPods bool) error {

    4.1 getKubeletContainers

     获得 kubelet 所管理的所有容器

// getKubeletContainers lists containers managed by kubelet.
// The boolean parameter specifies whether returns all containers including
// those already exited and dead containers (used for garbage collection).
func (m *kubeGenericRuntimeManager) getKubeletContainers(allContainers bool) ([]*runtimeapi.Container, error) {
	filter := &runtimeapi.ContainerFilter{}
	if !allContainers {
		filter.State = &runtimeapi.ContainerStateValue{
			State: runtimeapi.ContainerState_CONTAINER_RUNNING,
		}
	}

	containers, err := m.runtimeService.ListContainers(filter)
	if err != nil {
		klog.Errorf("getKubeletContainers failed: %v", err)
		return nil, err
	}

	return containers, nil
}

    4.2 获得所有 sanbox 以及 ID

// collect all the PodSandboxId of container
sandboxIDs := sets.NewString()
for _, container := range containers {
	sandboxIDs.Insert(container.PodSandboxId)
}

sandboxes, err := cgc.manager.getKubeletSandboxes(true)
if err != nil {
	return err
}

    4.3 sanbox 为 ready 以及还有容器的则设置为 active

// Set ready sandboxes to be active.
if sandbox.State == runtimeapi.PodSandboxState_SANDBOX_READY {
	sandboxInfo.active = true
}

// Set sandboxes that still have containers to be active.
if sandboxIDs.Has(sandbox.Id) {
	sandboxInfo.active = true
}

    4.4 如果 sandbox 所属的 pod 被删除,那么删除所有 sandbox

         否则删除旧的sandbox,留下最新的 sandbox

for podUID, sandboxes := range sandboxesByPod {
	if cgc.podStateProvider.IsPodDeleted(podUID) || (cgc.podStateProvider.IsPodTerminated(podUID) && evictTerminatedPods) {
		// Remove all evictable sandboxes if the pod has been removed.
		// Note that the latest dead sandbox is also removed if there is
		// already an active one.
		cgc.removeOldestNSandboxes(sandboxes, len(sandboxes))
	} else {
		// Keep latest one if the pod still exists.
		cgc.removeOldestNSandboxes(sandboxes, len(sandboxes)-1)
	}
}

 

5. evictPodLogsDirectories

    驱逐可驱逐的日志,也就是删除,没有pod就删除日志

    5.1 删除 /var/log/pods目录下那些pods是deleted的目录

// evictPodLogsDirectories evicts all evictable pod logs directories. Pod logs directories
// are evictable if there are no corresponding pods.
func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
	osInterface := cgc.manager.osInterface
	if allSourcesReady {
		// Only remove pod logs directories when all sources are ready.
		dirs, err := osInterface.ReadDir(podLogsRootDirectory)
		if err != nil {
			return fmt.Errorf("failed to read podLogsRootDirectory %q: %v", podLogsRootDirectory, err)
		}
		for _, dir := range dirs {
			name := dir.Name()
			podUID := parsePodUIDFromLogsDirectory(name)
			if !cgc.podStateProvider.IsPodDeleted(podUID) {
				continue
			}
			err := osInterface.RemoveAll(filepath.Join(podLogsRootDirectory, name))
			if err != nil {
				klog.Errorf("Failed to remove pod logs directory %q: %v", name, err)
			}
		}
	}

    5.2 删除 /var/logs/containers目录下的链接

// Remove dead container log symlinks.
// TODO(random-liu): Remove this after cluster logging supports CRI container log path.
logSymlinks, _ := osInterface.Glob(filepath.Join(legacyContainerLogsDir, fmt.Sprintf("*.%s", legacyLogSuffix)))
for _, logSymlink := range logSymlinks {
	if _, err := osInterface.Stat(logSymlink); os.IsNotExist(err) {
		err := osInterface.Remove(logSymlink)
		if err != nil {
			klog.Errorf("Failed to remove container log dead symlink %q: %v", logSymlink, err)
		}
	}
}

   

总结:

      evictContainers 驱逐容器

  • 获取所有可驱逐的容器,这种容器不是running并且创建时间大于minAge
  • 如果 pod 是已经删除的,将所有这个pod的容器删除
  • 根据 MaxPerPodContainer 和 MaxContainers 清理容器

 

    evictSandboxes 删除所有可驱逐的 sandbox

  • 如果 sandbox 所属的 pod 被删除,那么删除所有 sandbox,否则删除旧的sandbox,留下最新的 sandbox

 

    evictPodLogsDirectories

  •   删除日志目录以及链接

你可能感兴趣的:(#,kubelet)