kubelet 源码分析-镜像垃圾回收机制

本博客研究的代码版本是v1.11.0

流程图

kubelet 源码分析-镜像垃圾回收机制_第1张图片

源代码k8s.io/kubernetes/pkg/kubelet/kubelet.go 1229行

// StartGarbageCollection starts garbage collection threads.
func (kl *Kubelet) StartGarbageCollection() {
    loggedContainerGCFailure := false
    go wait.Until(func() {
        if err := kl.containerGC.GarbageCollect(); err != nil {
            glog.Errorf("Container garbage collection failed: %v", err)
            kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ContainerGCFailed, err.Error())
            loggedContainerGCFailure = true
        } else {
            var vLevel glog.Level = 4
            if loggedContainerGCFailure {
                vLevel = 1
                loggedContainerGCFailure = false
            }

            glog.V(vLevel).Infof("Container garbage collection succeeded")
        }
    }, ContainerGCPeriod, wait.NeverStop)

    stopChan := make(chan struct{})
    defer close(stopChan)
    // when the high threshold is set to 100, stub the image GC manager
    if kl.kubeletConfiguration.ImageGCHighThresholdPercent == 100 {
        glog.V(2).Infof("ImageGCHighThresholdPercent is set 100, Disable image GC")
        go func() { stopChan <- struct{}{} }()
    }

    prevImageGCFailed := false
    go wait.Until(func() {
        if err := kl.imageManager.GarbageCollect(); err != nil {
            if prevImageGCFailed {
                glog.Errorf("Image garbage collection failed multiple times in a row: %v", err)
                // Only create an event for repeated failures
                kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ImageGCFailed, err.Error())
            } else {
                glog.Errorf("Image garbage collection failed once. Stats initialization may not have completed yet: %v", err)
            }
            prevImageGCFailed = true
        } else {
            var vLevel glog.Level = 4
            if prevImageGCFailed {
                vLevel = 1
                prevImageGCFailed = false
            }

            glog.V(vLevel).Infof("Image garbage collection succeeded")
        }
    }, ImageGCPeriod, stopChan)
}

镜像的垃圾回收机制执行的周期是5min,但是由于使用了抖动功能,实际执行周期是10min一次

具体实现逻辑代码如下

prevImageGCFailed := false
    go wait.Until(func() {
        if err := kl.imageManager.GarbageCollect(); err != nil {
            if prevImageGCFailed {
                glog.Errorf("Image garbage collection failed multiple times in a row: %v", err)
                // Only create an event for repeated failures
                kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ImageGCFailed, err.Error())
            } else {
                glog.Errorf("Image garbage collection failed once. Stats initialization may not have completed yet: %v", err)
            }
            prevImageGCFailed = true
        } else {
            var vLevel glog.Level = 4
            if prevImageGCFailed {
                vLevel = 1
                prevImageGCFailed = false
            }

            glog.V(vLevel).Infof("Image garbage collection succeeded")
        }
    }, ImageGCPeriod, stopChan)

从源码可以看出执行的是kl.imageManager.GarbageCollect()这个功能函数

代码执行逻辑在
k8s.io/kubernetes/pkg/kubelet/images/image_gc_manager.go文件中

从代码文件可以看出是这个realImageGCManager结构体实现了ImageGCManager接口

type realImageGCManager struct {
    // Container runtime
    runtime container.Runtime

    // Records of images and their use.
    imageRecords     map[string]*imageRecord
    imageRecordsLock sync.Mutex

    // The image garbage collection policy in use.
    policy ImageGCPolicy

    // statsProvider provides stats used during image garbage collection.
    statsProvider StatsProvider

    // Recorder for Kubernetes events.
    recorder record.EventRecorder

    // Reference to this node.
    nodeRef *v1.ObjectReference

    // Track initialization
    initialized bool

    // imageCache is the cache of latest image list.
    imageCache imageCache

    // sandbox image exempted from GC
    sandboxImage string
}
type ImageGCManager interface {
    // Applies the garbage collection policy. Errors include being unable to free
    // enough space as per the garbage collection policy.
    GarbageCollect() error

    // Start async garbage collection of images.
    Start()

    GetImageList() ([]container.Image, error)

    // Delete all unused images.
    DeleteUnusedImages() error
}

realImageGCManager.realImageGCManager函数的具体实现方法如下
k8s.io/kubernetes/pkg/kubelet/images/image_gc_manager.go文件269-314

func (im *realImageGCManager) GarbageCollect() error {
    // Get disk usage on disk holding images.
    fsStats, err := im.statsProvider.ImageFsStats()
    if err != nil {
        return err
    }

    var capacity, available int64
    if fsStats.CapacityBytes != nil {
        capacity = int64(*fsStats.CapacityBytes)
    }
    if fsStats.AvailableBytes != nil {
        available = int64(*fsStats.AvailableBytes)
    }

    if available > capacity {
        glog.Warningf("available %d is larger than capacity %d", available, capacity)
        available = capacity
    }

    // Check valid capacity.
    if capacity == 0 {
        err := goerrors.New("invalid capacity 0 on image filesystem")
        im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.InvalidDiskCapacity, err.Error())
        return err
    }

    // If over the max threshold, free enough to place us at the lower threshold.
    usagePercent := 100 - int(available*100/capacity)
    if usagePercent >= im.policy.HighThresholdPercent {
        amountToFree := capacity*int64(100-im.policy.LowThresholdPercent)/100 - available
        glog.Infof("[imageGCManager]: Disk usage on image filesystem is at %d%% which is over the high threshold (%d%%). Trying to free %d bytes", usagePercent, im.policy.HighThresholdPercent, amountToFree)
        freed, err := im.freeSpace(amountToFree, time.Now())
        if err != nil {
            return err
        }

        if freed < amountToFree {
            err := fmt.Errorf("failed to garbage collect required amount of images. Wanted to free %d bytes, but freed %d bytes", amountToFree, freed)
            im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.FreeDiskSpaceFailed, err.Error())
            return err
        }
    }

    return nil
}

realImageGCManager.GarbageCollect() 执行步骤是
1.查询镜像所占的磁盘使用量
2.再根据镜像的GC策略对镜像进行清楚

具体分析获取镜像使用的磁盘使用量的实现

fsStats, err := im.statsProvider.ImageFsStats()
    if err != nil {
        return err
    }

想要知道im.statsProvider.ImageFsStats()如何实现,就必须知道它是如何来的,也就是NewImageGCManager在哪里用到了,一步一步回溯源码

func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, recorder record.EventRecorder, nodeRef *v1.ObjectReference, policy ImageGCPolicy, sandboxImage string) (ImageGCManager, error) {
    // Validate policy.
    if policy.HighThresholdPercent < 0 || policy.HighThresholdPercent > 100 {
        return nil, fmt.Errorf("invalid HighThresholdPercent %d, must be in range [0-100]", policy.HighThresholdPercent)
    }
    if policy.LowThresholdPercent < 0 || policy.LowThresholdPercent > 100 {
        return nil, fmt.Errorf("invalid LowThresholdPercent %d, must be in range [0-100]", policy.LowThresholdPercent)
    }
    if policy.LowThresholdPercent > policy.HighThresholdPercent {
        return nil, fmt.Errorf("LowThresholdPercent %d can not be higher than HighThresholdPercent %d", policy.LowThresholdPercent, policy.HighThresholdPercent)
    }
    im := &realImageGCManager{
        runtime:       runtime,
        policy:        policy,
        imageRecords:  make(map[string]*imageRecord),
        statsProvider: statsProvider,
        recorder:      recorder,
        nodeRef:       nodeRef,
        initialized:   false,
        sandboxImage:  sandboxImage,
    }

    return im, nil
}

NewImageGCManager 调用地方是在k8s.io/kubernetes/pkg/kubelet/kubelet.go文件716

// setup imageManager
    imageManager, err := images.NewImageGCManager(klet.containerRuntime, klet.StatsProvider, kubeDeps.Recorder, nodeRef, imageGCPolicy, crOptions.PodSandboxImage)
    if err != nil {
        return nil, fmt.Errorf("failed to initialize image manager: %v", err)
    }
    klet.imageManager = imageManager
if cadvisor.UsingLegacyCadvisorStats(containerRuntime, remoteRuntimeEndpoint) {
        klet.StatsProvider = stats.NewCadvisorStatsProvider(
            klet.cadvisor,
            klet.resourceAnalyzer,
            klet.podManager,
            klet.runtimeCache,
            klet.containerRuntime)
    } else {
        klet.StatsProvider = stats.NewCRIStatsProvider(
            klet.cadvisor,
            klet.resourceAnalyzer,
            klet.podManager,
            klet.runtimeCache,
            runtimeService,
            imageService,
            stats.NewLogMetricsService())
    }

从源码可以看出klet.StatsProvider 是由两种情况创建的,

其中在linux下且运行时是docker时使用的是cadvisor.UsingLegacyCadvisorStats
这个函数的具体实现如下,源码在k8s.io/kubernetes/pkg/kubelet/cadvisor/util.go这个文件77-80

func UsingLegacyCadvisorStats(runtime, runtimeEndpoint string) bool {
    return (runtime == kubetypes.DockerContainerRuntime && goruntime.GOOS == "linux") ||
        runtimeEndpoint == CrioSocket
}

那么,接下来我们分析NewCadvisorStatsProvider函数
k8s.io/kubernetes/pkg/kubelet/stats/stats_provider.go 文件49-57

// NewCadvisorStatsProvider returns a containerStatsProvider that provides both
// the node and the container stats from cAdvisor.
func NewCadvisorStatsProvider(
    cadvisor cadvisor.Interface,
    resourceAnalyzer stats.ResourceAnalyzer,
    podManager kubepod.Manager,
    runtimeCache kubecontainer.RuntimeCache,
    imageService kubecontainer.ImageService,
) *StatsProvider {
    return newStatsProvider(cadvisor, podManager, runtimeCache, newCadvisorStatsProvider(cadvisor, resourceAnalyzer, imageService))
}

NewCadvisorStatsProvider的入参可以看出,New出来的具有监控功能 pod管理功能 缓存机制 容器状态机制等功能

newStatsProvider实现如下

// newStatsProvider returns a new StatsProvider that provides node stats from
// cAdvisor and the container stats using the containerStatsProvider.
func newStatsProvider(
    cadvisor cadvisor.Interface,
    podManager kubepod.Manager,
    runtimeCache kubecontainer.RuntimeCache,
    containerStatsProvider containerStatsProvider,
) *StatsProvider {
    return &StatsProvider{
        cadvisor:               cadvisor,
        podManager:             podManager,
        runtimeCache:           runtimeCache,
        containerStatsProvider: containerStatsProvider,
    }
}

分析newStatsProvider函数的入参newCadvisorStatsProvider(cadvisor, resourceAnalyzer, imageService)

具体实现源代码在k8s.io/kubernetes/pkg/kubelet/stats/cadvisor_stats_provider.go 文件54-64

// container stats from cAdvisor.
func newCadvisorStatsProvider(
    cadvisor cadvisor.Interface,
    resourceAnalyzer stats.ResourceAnalyzer,
    imageService kubecontainer.ImageService,
) containerStatsProvider {
    return &cadvisorStatsProvider{
        cadvisor:         cadvisor,
        resourceAnalyzer: resourceAnalyzer,
        imageService:     imageService,
    }
}

这个结构体newCadvisorStatsProvider实现了接口containerStatsProvider
k8s.io/kubernetes/pkg/kubelet/stats/stats_provider.go 86-90

// containerStatsProvider is an interface that provides the stats of the
// containers managed by pods.
type containerStatsProvider interface {
    ListPodStats() ([]statsapi.PodStats, error)
    ImageFsStats() (*statsapi.FsStats, error)
    ImageFsDevice() (string, error)
}

cadvisorStatsProvider.ImageFsStats具体实现
k8s.io/kubernetes/pkg/kubelet/stats/cadvisor_stats_provider.go 196-221

// ImageFsStats returns the stats of the filesystem for storing images.
func (p *cadvisorStatsProvider) ImageFsStats() (*statsapi.FsStats, error) {
    imageFsInfo, err := p.cadvisor.ImagesFsInfo()
    if err != nil {
        return nil, fmt.Errorf("failed to get imageFs info: %v", err)
    }
    imageStats, err := p.imageService.ImageStats()
    if err != nil || imageStats == nil {
        return nil, fmt.Errorf("failed to get image stats: %v", err)
    }

    var imageFsInodesUsed *uint64
    if imageFsInfo.Inodes != nil && imageFsInfo.InodesFree != nil {
        imageFsIU := *imageFsInfo.Inodes - *imageFsInfo.InodesFree
        imageFsInodesUsed = &imageFsIU
    }

    return &statsapi.FsStats{
        Time:           metav1.NewTime(imageFsInfo.Timestamp),
        AvailableBytes: &imageFsInfo.Available,
        CapacityBytes:  &imageFsInfo.Capacity,
        UsedBytes:      &imageStats.TotalStorageBytes,
        InodesFree:     imageFsInfo.InodesFree,
        Inodes:         imageFsInfo.Inodes,
        InodesUsed:     imageFsInodesUsed,
    }, nil
}

从源代码可以看得出先取cadvisor.ImagesFsInfo镜像文件信息,然后再取镜像的状态信息p.imageService.ImageStats()

那么接下来就得先分析cadvisor.ImagesFsInfo以及p.imageService.ImageStats()具体实现了

1cadvisor.ImagesFsInfo的具体实现

k8s.io/kubernetes/pkg/kubelet/kubelet.go 513

cadvisor:                       kubeDeps.CAdvisorInterface,

回溯到k8s.io/kubernetes/cmd/kubelet/app/server.go 635-641

if kubeDeps.CAdvisorInterface == nil {
        imageFsInfoProvider := cadvisor.NewImageFsInfoProvider(s.ContainerRuntime, s.RemoteRuntimeEndpoint)
        kubeDeps.CAdvisorInterface, err = cadvisor.New(s.Address, uint(s.CAdvisorPort), imageFsInfoProvider, s.RootDirectory, cadvisor.UsingLegacyCadvisorStats(s.ContainerRuntime, s.RemoteRuntimeEndpoint))
        if err != nil {
            return err
        }
    }

cadvisor.New执行代码逻辑
k8s.io/kubernetes/pkg/kubelet/cadvisor/cadvisor_unsupported.go

func New(address string, port uint, imageFsInfoProvider ImageFsInfoProvider, rootPath string, usingLegacyStats bool) (Interface, error) {
    return &cadvisorUnsupported{}, nil
}

还处于未支持的状态
还有一处
k8s.io/kubernetes/cmd/kubelet/app/server.go 385

CAdvisorInterface:   nil, // cadvisor.New launches background processes (bg http.ListenAndServe, and some bg cleaners), not set here

可以看得出,cadvisor服务是后台运行的服务

2.p.imageService.ImageStats()的具体实现

实际上就是这个对象cadvisorStatsProvider实现了

type cadvisorStatsProvider struct {
    // cadvisor is used to get the stats of the cgroup for the containers that
    // are managed by pods.
    cadvisor cadvisor.Interface
    // resourceAnalyzer is used to get the volume stats of the pods.
    resourceAnalyzer stats.ResourceAnalyzer
    // imageService is used to get the stats of the image filesystem.
    imageService kubecontainer.ImageService
}

新建对象cadvisorStatsProvider

func newCadvisorStatsProvider(
    cadvisor cadvisor.Interface,
    resourceAnalyzer stats.ResourceAnalyzer,
    imageService kubecontainer.ImageService,
) containerStatsProvider {
    return &cadvisorStatsProvider{
        cadvisor:         cadvisor,
        resourceAnalyzer: resourceAnalyzer,
        imageService:     imageService,
    }
}

实际上是容器运行时klet.containerRuntime
或者是通过

runtimeService, imageService, err := getRuntimeAndImageServices(remoteRuntimeEndpoint, remoteImageEndpoint, kubeCfg.RuntimeRequestTimeout)
    if err != nil {
        return nil, err
    }

中赋值imageService
这个是由以下决定的

if cadvisor.UsingLegacyCadvisorStats(containerRuntime, remoteRuntimeEndpoint) {
        klet.StatsProvider = stats.NewCadvisorStatsProvider(
            klet.cadvisor,
            klet.resourceAnalyzer,
            klet.podManager,
            klet.runtimeCache,
            klet.containerRuntime)
    } else {
        klet.StatsProvider = stats.NewCRIStatsProvider(
            klet.cadvisor,
            klet.resourceAnalyzer,
            klet.podManager,
            klet.runtimeCache,
            runtimeService,
            imageService,
            stats.NewLogMetricsService())
    }

如果是通过imageService实现 ,那么具体的实现在k8s.io/kubernetes/pkg/kubelet/remote/remote_image.go文件中,

type RemoteImageService struct {
    timeout     time.Duration
    imageClient runtimeapi.ImageServiceClient
}

结构RemoteImageService实现了ImageStatus方法

// ImageStatus returns the status of the image.
func (r *RemoteImageService) ImageStatus(image *runtimeapi.ImageSpec) (*runtimeapi.Image, error) {
    ctx, cancel := getContextWithTimeout(r.timeout)
    defer cancel()

    resp, err := r.imageClient.ImageStatus(ctx, &runtimeapi.ImageStatusRequest{
        Image: image,
    })
    if err != nil {
        glog.Errorf("ImageStatus %q from image service failed: %v", image.Image, err)
        return nil, err
    }

    if resp.Image != nil {
        if resp.Image.Id == "" || resp.Image.Size_ == 0 {
            errorMessage := fmt.Sprintf("Id or size of image %q is not set", image.Image)
            glog.Errorf("ImageStatus failed: %s", errorMessage)
            return nil, errors.New(errorMessage)
        }
    }

    return resp.Image, nil
}

最终实现是通过RPC协议实现的,具体实现k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2/api.pb.go

type imageServiceClient struct {
    cc *grpc.ClientConn
}

func (c *imageServiceClient) ImageStatus(ctx context.Context, in *ImageStatusRequest, opts ...grpc.CallOption) (*ImageStatusResponse, error) {
    out := new(ImageStatusResponse)
    err := grpc.Invoke(ctx, "/runtime.v1alpha2.ImageService/ImageStatus", in, out, c.cc, opts...)
    if err != nil {
        return nil, err
    }
    return out, nil
}

回到klet.containerRuntime创建方式

runtime, err := kuberuntime.NewKubeGenericRuntimeManager(
        kubecontainer.FilterEventRecorder(kubeDeps.Recorder),
        klet.livenessManager,
        seccompProfileRoot,
        containerRefManager,
        machineInfo,
        klet,
        kubeDeps.OSInterface,
        klet,
        httpClient,
        imageBackOff,
        kubeCfg.SerializeImagePulls,
        float32(kubeCfg.RegistryPullQPS),
        int(kubeCfg.RegistryBurst),
        kubeCfg.CPUCFSQuota,
        runtimeService,
        imageService,
        kubeDeps.ContainerManager.InternalContainerLifecycle(),
        legacyLogProvider,
    )
    if err != nil {
        return nil, err
    }
    klet.containerRuntime = runtime
    klet.streamingRuntime = runtime
    klet.runner = runtime

    if cadvisor.UsingLegacyCadvisorStats(containerRuntime, remoteRuntimeEndpoint) {
        klet.StatsProvider = stats.NewCadvisorStatsProvider(
            klet.cadvisor,
            klet.resourceAnalyzer,
            klet.podManager,
            klet.runtimeCache,
            klet.containerRuntime)
    } else {
        klet.StatsProvider = stats.NewCRIStatsProvider(
            klet.cadvisor,
            klet.resourceAnalyzer,
            klet.podManager,
            klet.runtimeCache,
            runtimeService,
            imageService,
            stats.NewLogMetricsService())
    }

可以看出klet.containerRuntime 对象是由kuberuntime.NewKubeGenericRuntimeManager方法创建的

而且实现了klet.containerRuntime

type ImageService interface {
    // PullImage pulls an image from the network to local storage using the supplied
    // secrets if necessary. It returns a reference (digest or ID) to the pulled image.
    PullImage(image ImageSpec, pullSecrets []v1.Secret) (string, error)
    // GetImageRef gets the reference (digest or ID) of the image which has already been in
    // the local storage. It returns ("", nil) if the image isn't in the local storage.
    GetImageRef(image ImageSpec) (string, error)
    // Gets all images currently on the machine.
    ListImages() ([]Image, error)
    // Removes the specified image.
    RemoveImage(image ImageSpec) error
    // Returns Image statistics.
    ImageStats() (*ImageStats, error)
}

其中imageService也是由getRuntimeAndImageServices生成的

runtimeService, imageService, err := getRuntimeAndImageServices(remoteRuntimeEndpoint, remoteImageEndpoint, kubeCfg.RuntimeRequestTimeout)
    if err != nil {
        return nil, err
    }

也就是说不管是以哪种方式运行UsingLegacyCadvisorStats 都是通过GRPC协议进行获取镜像状态的

回到函数realImageGCManager.GarbageCollect()

func (im *realImageGCManager) GarbageCollect() error {
    // Get disk usage on disk holding images.
    fsStats, err := im.statsProvider.ImageFsStats()
    if err != nil {
        return err
    }

    var capacity, available int64
    if fsStats.CapacityBytes != nil {
        capacity = int64(*fsStats.CapacityBytes)
    }
    if fsStats.AvailableBytes != nil {
        available = int64(*fsStats.AvailableBytes)
    }

    if available > capacity {
        glog.Warningf("available %d is larger than capacity %d", available, capacity)
        available = capacity
    }

    // Check valid capacity.
    if capacity == 0 {
        err := goerrors.New("invalid capacity 0 on image filesystem")
        im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.InvalidDiskCapacity, err.Error())
        return err
    }

    // If over the max threshold, free enough to place us at the lower threshold.
    usagePercent := 100 - int(available*100/capacity)
    if usagePercent >= im.policy.HighThresholdPercent {
        amountToFree := capacity*int64(100-im.policy.LowThresholdPercent)/100 - available
        glog.Infof("[imageGCManager]: Disk usage on image filesystem is at %d%% which is over the high threshold (%d%%). Trying to free %d bytes", usagePercent, im.policy.HighThresholdPercent, amountToFree)
        freed, err := im.freeSpace(amountToFree, time.Now())
        if err != nil {
            return err
        }

        if freed < amountToFree {
            err := fmt.Errorf("failed to garbage collect required amount of images. Wanted to free %d bytes, but freed %d bytes", amountToFree, freed)
            im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.FreeDiskSpaceFailed, err.Error())
            return err
        }
    }

    return nil
}

获取完镜像的fsStats信息,就按照相关的镜像GC策略进行镜像清除了

具体实现

freed, err := im.freeSpace(amountToFree, time.Now())
        if err != nil {
            return err
        }

函数im.freeSpace的具体实现方式k8s.io/kubernetes/pkg/kubelet/images/image_gc_manager.go 328-389
执行逻辑就是正在使用的镜像不会清除,删除不使用的镜像
该函数在10min为周期不停地执行

本人觉得记住这俩 差不多搞定了

runtimeService:      newInstrumentedRuntimeService(runtimeService),
        imageService:        newInstrumentedImageManagerService(imageService),

到此kubelet的镜像清除策略 已经分析完成

你可能感兴趣的:(kubetnetes,kubelet,imageGC)