本博客研究的代码版本是v1.11.0
源代码k8s.io/kubernetes/pkg/kubelet/kubelet.go 1229行
// StartGarbageCollection starts garbage collection threads.
func (kl *Kubelet) StartGarbageCollection() {
loggedContainerGCFailure := false
go wait.Until(func() {
if err := kl.containerGC.GarbageCollect(); err != nil {
glog.Errorf("Container garbage collection failed: %v", err)
kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ContainerGCFailed, err.Error())
loggedContainerGCFailure = true
} else {
var vLevel glog.Level = 4
if loggedContainerGCFailure {
vLevel = 1
loggedContainerGCFailure = false
}
glog.V(vLevel).Infof("Container garbage collection succeeded")
}
}, ContainerGCPeriod, wait.NeverStop)
stopChan := make(chan struct{})
defer close(stopChan)
// when the high threshold is set to 100, stub the image GC manager
if kl.kubeletConfiguration.ImageGCHighThresholdPercent == 100 {
glog.V(2).Infof("ImageGCHighThresholdPercent is set 100, Disable image GC")
go func() { stopChan <- struct{}{} }()
}
prevImageGCFailed := false
go wait.Until(func() {
if err := kl.imageManager.GarbageCollect(); err != nil {
if prevImageGCFailed {
glog.Errorf("Image garbage collection failed multiple times in a row: %v", err)
// Only create an event for repeated failures
kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ImageGCFailed, err.Error())
} else {
glog.Errorf("Image garbage collection failed once. Stats initialization may not have completed yet: %v", err)
}
prevImageGCFailed = true
} else {
var vLevel glog.Level = 4
if prevImageGCFailed {
vLevel = 1
prevImageGCFailed = false
}
glog.V(vLevel).Infof("Image garbage collection succeeded")
}
}, ImageGCPeriod, stopChan)
}
镜像的垃圾回收机制执行的周期是5min
,但是由于使用了抖动功能,实际执行周期是10min
一次
具体实现逻辑代码如下
prevImageGCFailed := false
go wait.Until(func() {
if err := kl.imageManager.GarbageCollect(); err != nil {
if prevImageGCFailed {
glog.Errorf("Image garbage collection failed multiple times in a row: %v", err)
// Only create an event for repeated failures
kl.recorder.Eventf(kl.nodeRef, v1.EventTypeWarning, events.ImageGCFailed, err.Error())
} else {
glog.Errorf("Image garbage collection failed once. Stats initialization may not have completed yet: %v", err)
}
prevImageGCFailed = true
} else {
var vLevel glog.Level = 4
if prevImageGCFailed {
vLevel = 1
prevImageGCFailed = false
}
glog.V(vLevel).Infof("Image garbage collection succeeded")
}
}, ImageGCPeriod, stopChan)
从源码可以看出执行的是kl.imageManager.GarbageCollect()
这个功能函数
代码执行逻辑在
k8s.io/kubernetes/pkg/kubelet/images/image_gc_manager.go文件中
从代码文件可以看出是这个realImageGCManager
结构体实现了ImageGCManager
接口
type realImageGCManager struct {
// Container runtime
runtime container.Runtime
// Records of images and their use.
imageRecords map[string]*imageRecord
imageRecordsLock sync.Mutex
// The image garbage collection policy in use.
policy ImageGCPolicy
// statsProvider provides stats used during image garbage collection.
statsProvider StatsProvider
// Recorder for Kubernetes events.
recorder record.EventRecorder
// Reference to this node.
nodeRef *v1.ObjectReference
// Track initialization
initialized bool
// imageCache is the cache of latest image list.
imageCache imageCache
// sandbox image exempted from GC
sandboxImage string
}
type ImageGCManager interface {
// Applies the garbage collection policy. Errors include being unable to free
// enough space as per the garbage collection policy.
GarbageCollect() error
// Start async garbage collection of images.
Start()
GetImageList() ([]container.Image, error)
// Delete all unused images.
DeleteUnusedImages() error
}
realImageGCManager.realImageGCManager
函数的具体实现方法如下
k8s.io/kubernetes/pkg/kubelet/images/image_gc_manager.go文件269-314
行
func (im *realImageGCManager) GarbageCollect() error {
// Get disk usage on disk holding images.
fsStats, err := im.statsProvider.ImageFsStats()
if err != nil {
return err
}
var capacity, available int64
if fsStats.CapacityBytes != nil {
capacity = int64(*fsStats.CapacityBytes)
}
if fsStats.AvailableBytes != nil {
available = int64(*fsStats.AvailableBytes)
}
if available > capacity {
glog.Warningf("available %d is larger than capacity %d", available, capacity)
available = capacity
}
// Check valid capacity.
if capacity == 0 {
err := goerrors.New("invalid capacity 0 on image filesystem")
im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.InvalidDiskCapacity, err.Error())
return err
}
// If over the max threshold, free enough to place us at the lower threshold.
usagePercent := 100 - int(available*100/capacity)
if usagePercent >= im.policy.HighThresholdPercent {
amountToFree := capacity*int64(100-im.policy.LowThresholdPercent)/100 - available
glog.Infof("[imageGCManager]: Disk usage on image filesystem is at %d%% which is over the high threshold (%d%%). Trying to free %d bytes", usagePercent, im.policy.HighThresholdPercent, amountToFree)
freed, err := im.freeSpace(amountToFree, time.Now())
if err != nil {
return err
}
if freed < amountToFree {
err := fmt.Errorf("failed to garbage collect required amount of images. Wanted to free %d bytes, but freed %d bytes", amountToFree, freed)
im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.FreeDiskSpaceFailed, err.Error())
return err
}
}
return nil
}
realImageGCManager.GarbageCollect()
执行步骤是
1.查询镜像所占的磁盘使用量
2.再根据镜像的GC策略对镜像进行清楚
具体分析获取镜像使用的磁盘使用量的实现
fsStats, err := im.statsProvider.ImageFsStats()
if err != nil {
return err
}
想要知道im.statsProvider.ImageFsStats()
如何实现,就必须知道它是如何来的,也就是NewImageGCManager
在哪里用到了,一步一步回溯源码
func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, recorder record.EventRecorder, nodeRef *v1.ObjectReference, policy ImageGCPolicy, sandboxImage string) (ImageGCManager, error) {
// Validate policy.
if policy.HighThresholdPercent < 0 || policy.HighThresholdPercent > 100 {
return nil, fmt.Errorf("invalid HighThresholdPercent %d, must be in range [0-100]", policy.HighThresholdPercent)
}
if policy.LowThresholdPercent < 0 || policy.LowThresholdPercent > 100 {
return nil, fmt.Errorf("invalid LowThresholdPercent %d, must be in range [0-100]", policy.LowThresholdPercent)
}
if policy.LowThresholdPercent > policy.HighThresholdPercent {
return nil, fmt.Errorf("LowThresholdPercent %d can not be higher than HighThresholdPercent %d", policy.LowThresholdPercent, policy.HighThresholdPercent)
}
im := &realImageGCManager{
runtime: runtime,
policy: policy,
imageRecords: make(map[string]*imageRecord),
statsProvider: statsProvider,
recorder: recorder,
nodeRef: nodeRef,
initialized: false,
sandboxImage: sandboxImage,
}
return im, nil
}
NewImageGCManager
调用地方是在k8s.io/kubernetes/pkg/kubelet/kubelet.go文件716
行
// setup imageManager
imageManager, err := images.NewImageGCManager(klet.containerRuntime, klet.StatsProvider, kubeDeps.Recorder, nodeRef, imageGCPolicy, crOptions.PodSandboxImage)
if err != nil {
return nil, fmt.Errorf("failed to initialize image manager: %v", err)
}
klet.imageManager = imageManager
if cadvisor.UsingLegacyCadvisorStats(containerRuntime, remoteRuntimeEndpoint) {
klet.StatsProvider = stats.NewCadvisorStatsProvider(
klet.cadvisor,
klet.resourceAnalyzer,
klet.podManager,
klet.runtimeCache,
klet.containerRuntime)
} else {
klet.StatsProvider = stats.NewCRIStatsProvider(
klet.cadvisor,
klet.resourceAnalyzer,
klet.podManager,
klet.runtimeCache,
runtimeService,
imageService,
stats.NewLogMetricsService())
}
从源码可以看出klet.StatsProvider
是由两种情况创建的,
其中在linux下且运行时是docker时使用的是cadvisor.UsingLegacyCadvisorStats
,
这个函数的具体实现如下,源码在k8s.io/kubernetes/pkg/kubelet/cadvisor/util.go这个文件77-80
行
func UsingLegacyCadvisorStats(runtime, runtimeEndpoint string) bool {
return (runtime == kubetypes.DockerContainerRuntime && goruntime.GOOS == "linux") ||
runtimeEndpoint == CrioSocket
}
那么,接下来我们分析NewCadvisorStatsProvider
函数
k8s.io/kubernetes/pkg/kubelet/stats/stats_provider.go 文件49-57
行
// NewCadvisorStatsProvider returns a containerStatsProvider that provides both
// the node and the container stats from cAdvisor.
func NewCadvisorStatsProvider(
cadvisor cadvisor.Interface,
resourceAnalyzer stats.ResourceAnalyzer,
podManager kubepod.Manager,
runtimeCache kubecontainer.RuntimeCache,
imageService kubecontainer.ImageService,
) *StatsProvider {
return newStatsProvider(cadvisor, podManager, runtimeCache, newCadvisorStatsProvider(cadvisor, resourceAnalyzer, imageService))
}
从NewCadvisorStatsProvider
的入参可以看出,New出来的具有监控功能 pod管理功能 缓存机制 容器状态机制等功能
newStatsProvider
实现如下
// newStatsProvider returns a new StatsProvider that provides node stats from
// cAdvisor and the container stats using the containerStatsProvider.
func newStatsProvider(
cadvisor cadvisor.Interface,
podManager kubepod.Manager,
runtimeCache kubecontainer.RuntimeCache,
containerStatsProvider containerStatsProvider,
) *StatsProvider {
return &StatsProvider{
cadvisor: cadvisor,
podManager: podManager,
runtimeCache: runtimeCache,
containerStatsProvider: containerStatsProvider,
}
}
分析newStatsProvider
函数的入参newCadvisorStatsProvider(cadvisor, resourceAnalyzer, imageService)
具体实现源代码在k8s.io/kubernetes/pkg/kubelet/stats/cadvisor_stats_provider.go 文件54-64
行
// container stats from cAdvisor.
func newCadvisorStatsProvider(
cadvisor cadvisor.Interface,
resourceAnalyzer stats.ResourceAnalyzer,
imageService kubecontainer.ImageService,
) containerStatsProvider {
return &cadvisorStatsProvider{
cadvisor: cadvisor,
resourceAnalyzer: resourceAnalyzer,
imageService: imageService,
}
}
这个结构体newCadvisorStatsProvider
实现了接口containerStatsProvider
k8s.io/kubernetes/pkg/kubelet/stats/stats_provider.go 86-90
行
// containerStatsProvider is an interface that provides the stats of the
// containers managed by pods.
type containerStatsProvider interface {
ListPodStats() ([]statsapi.PodStats, error)
ImageFsStats() (*statsapi.FsStats, error)
ImageFsDevice() (string, error)
}
cadvisorStatsProvider.ImageFsStats
具体实现
k8s.io/kubernetes/pkg/kubelet/stats/cadvisor_stats_provider.go 196-221
行
// ImageFsStats returns the stats of the filesystem for storing images.
func (p *cadvisorStatsProvider) ImageFsStats() (*statsapi.FsStats, error) {
imageFsInfo, err := p.cadvisor.ImagesFsInfo()
if err != nil {
return nil, fmt.Errorf("failed to get imageFs info: %v", err)
}
imageStats, err := p.imageService.ImageStats()
if err != nil || imageStats == nil {
return nil, fmt.Errorf("failed to get image stats: %v", err)
}
var imageFsInodesUsed *uint64
if imageFsInfo.Inodes != nil && imageFsInfo.InodesFree != nil {
imageFsIU := *imageFsInfo.Inodes - *imageFsInfo.InodesFree
imageFsInodesUsed = &imageFsIU
}
return &statsapi.FsStats{
Time: metav1.NewTime(imageFsInfo.Timestamp),
AvailableBytes: &imageFsInfo.Available,
CapacityBytes: &imageFsInfo.Capacity,
UsedBytes: &imageStats.TotalStorageBytes,
InodesFree: imageFsInfo.InodesFree,
Inodes: imageFsInfo.Inodes,
InodesUsed: imageFsInodesUsed,
}, nil
}
从源代码可以看得出先取cadvisor.ImagesFsInfo
镜像文件信息,然后再取镜像的状态信息p.imageService.ImageStats()
那么接下来就得先分析cadvisor.ImagesFsInfo
以及p.imageService.ImageStats()
具体实现了
1cadvisor.ImagesFsInfo
的具体实现
k8s.io/kubernetes/pkg/kubelet/kubelet.go 513
行
cadvisor: kubeDeps.CAdvisorInterface,
回溯到k8s.io/kubernetes/cmd/kubelet/app/server.go 635-641
行
if kubeDeps.CAdvisorInterface == nil {
imageFsInfoProvider := cadvisor.NewImageFsInfoProvider(s.ContainerRuntime, s.RemoteRuntimeEndpoint)
kubeDeps.CAdvisorInterface, err = cadvisor.New(s.Address, uint(s.CAdvisorPort), imageFsInfoProvider, s.RootDirectory, cadvisor.UsingLegacyCadvisorStats(s.ContainerRuntime, s.RemoteRuntimeEndpoint))
if err != nil {
return err
}
}
cadvisor.New执行代码逻辑
k8s.io/kubernetes/pkg/kubelet/cadvisor/cadvisor_unsupported.go
func New(address string, port uint, imageFsInfoProvider ImageFsInfoProvider, rootPath string, usingLegacyStats bool) (Interface, error) {
return &cadvisorUnsupported{}, nil
}
还处于未支持的状态
还有一处
k8s.io/kubernetes/cmd/kubelet/app/server.go 385
行
CAdvisorInterface: nil, // cadvisor.New launches background processes (bg http.ListenAndServe, and some bg cleaners), not set here
可以看得出,cadvisor
服务是后台运行的服务
2.p.imageService.ImageStats()
的具体实现
实际上就是这个对象cadvisorStatsProvider
实现了
type cadvisorStatsProvider struct {
// cadvisor is used to get the stats of the cgroup for the containers that
// are managed by pods.
cadvisor cadvisor.Interface
// resourceAnalyzer is used to get the volume stats of the pods.
resourceAnalyzer stats.ResourceAnalyzer
// imageService is used to get the stats of the image filesystem.
imageService kubecontainer.ImageService
}
新建对象cadvisorStatsProvider
func newCadvisorStatsProvider(
cadvisor cadvisor.Interface,
resourceAnalyzer stats.ResourceAnalyzer,
imageService kubecontainer.ImageService,
) containerStatsProvider {
return &cadvisorStatsProvider{
cadvisor: cadvisor,
resourceAnalyzer: resourceAnalyzer,
imageService: imageService,
}
}
实际上是容器运行时klet.containerRuntime
或者是通过
runtimeService, imageService, err := getRuntimeAndImageServices(remoteRuntimeEndpoint, remoteImageEndpoint, kubeCfg.RuntimeRequestTimeout)
if err != nil {
return nil, err
}
中赋值imageService
这个是由以下决定的
if cadvisor.UsingLegacyCadvisorStats(containerRuntime, remoteRuntimeEndpoint) {
klet.StatsProvider = stats.NewCadvisorStatsProvider(
klet.cadvisor,
klet.resourceAnalyzer,
klet.podManager,
klet.runtimeCache,
klet.containerRuntime)
} else {
klet.StatsProvider = stats.NewCRIStatsProvider(
klet.cadvisor,
klet.resourceAnalyzer,
klet.podManager,
klet.runtimeCache,
runtimeService,
imageService,
stats.NewLogMetricsService())
}
如果是通过imageService
实现 ,那么具体的实现在k8s.io/kubernetes/pkg/kubelet/remote/remote_image.go文件中,
type RemoteImageService struct {
timeout time.Duration
imageClient runtimeapi.ImageServiceClient
}
结构RemoteImageService
实现了ImageStatus
方法
// ImageStatus returns the status of the image.
func (r *RemoteImageService) ImageStatus(image *runtimeapi.ImageSpec) (*runtimeapi.Image, error) {
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()
resp, err := r.imageClient.ImageStatus(ctx, &runtimeapi.ImageStatusRequest{
Image: image,
})
if err != nil {
glog.Errorf("ImageStatus %q from image service failed: %v", image.Image, err)
return nil, err
}
if resp.Image != nil {
if resp.Image.Id == "" || resp.Image.Size_ == 0 {
errorMessage := fmt.Sprintf("Id or size of image %q is not set", image.Image)
glog.Errorf("ImageStatus failed: %s", errorMessage)
return nil, errors.New(errorMessage)
}
}
return resp.Image, nil
}
最终实现是通过RPC协议实现的,具体实现k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2/api.pb.go
type imageServiceClient struct {
cc *grpc.ClientConn
}
func (c *imageServiceClient) ImageStatus(ctx context.Context, in *ImageStatusRequest, opts ...grpc.CallOption) (*ImageStatusResponse, error) {
out := new(ImageStatusResponse)
err := grpc.Invoke(ctx, "/runtime.v1alpha2.ImageService/ImageStatus", in, out, c.cc, opts...)
if err != nil {
return nil, err
}
return out, nil
}
回到klet.containerRuntime
创建方式
runtime, err := kuberuntime.NewKubeGenericRuntimeManager(
kubecontainer.FilterEventRecorder(kubeDeps.Recorder),
klet.livenessManager,
seccompProfileRoot,
containerRefManager,
machineInfo,
klet,
kubeDeps.OSInterface,
klet,
httpClient,
imageBackOff,
kubeCfg.SerializeImagePulls,
float32(kubeCfg.RegistryPullQPS),
int(kubeCfg.RegistryBurst),
kubeCfg.CPUCFSQuota,
runtimeService,
imageService,
kubeDeps.ContainerManager.InternalContainerLifecycle(),
legacyLogProvider,
)
if err != nil {
return nil, err
}
klet.containerRuntime = runtime
klet.streamingRuntime = runtime
klet.runner = runtime
if cadvisor.UsingLegacyCadvisorStats(containerRuntime, remoteRuntimeEndpoint) {
klet.StatsProvider = stats.NewCadvisorStatsProvider(
klet.cadvisor,
klet.resourceAnalyzer,
klet.podManager,
klet.runtimeCache,
klet.containerRuntime)
} else {
klet.StatsProvider = stats.NewCRIStatsProvider(
klet.cadvisor,
klet.resourceAnalyzer,
klet.podManager,
klet.runtimeCache,
runtimeService,
imageService,
stats.NewLogMetricsService())
}
可以看出klet.containerRuntime
对象是由kuberuntime.NewKubeGenericRuntimeManager
方法创建的
而且实现了klet.containerRuntime
type ImageService interface {
// PullImage pulls an image from the network to local storage using the supplied
// secrets if necessary. It returns a reference (digest or ID) to the pulled image.
PullImage(image ImageSpec, pullSecrets []v1.Secret) (string, error)
// GetImageRef gets the reference (digest or ID) of the image which has already been in
// the local storage. It returns ("", nil) if the image isn't in the local storage.
GetImageRef(image ImageSpec) (string, error)
// Gets all images currently on the machine.
ListImages() ([]Image, error)
// Removes the specified image.
RemoveImage(image ImageSpec) error
// Returns Image statistics.
ImageStats() (*ImageStats, error)
}
其中imageService
也是由getRuntimeAndImageServices
生成的
runtimeService, imageService, err := getRuntimeAndImageServices(remoteRuntimeEndpoint, remoteImageEndpoint, kubeCfg.RuntimeRequestTimeout)
if err != nil {
return nil, err
}
也就是说不管是以哪种方式运行UsingLegacyCadvisorStats
都是通过GRPC协议进行获取镜像状态的
回到函数realImageGCManager.GarbageCollect()
func (im *realImageGCManager) GarbageCollect() error {
// Get disk usage on disk holding images.
fsStats, err := im.statsProvider.ImageFsStats()
if err != nil {
return err
}
var capacity, available int64
if fsStats.CapacityBytes != nil {
capacity = int64(*fsStats.CapacityBytes)
}
if fsStats.AvailableBytes != nil {
available = int64(*fsStats.AvailableBytes)
}
if available > capacity {
glog.Warningf("available %d is larger than capacity %d", available, capacity)
available = capacity
}
// Check valid capacity.
if capacity == 0 {
err := goerrors.New("invalid capacity 0 on image filesystem")
im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.InvalidDiskCapacity, err.Error())
return err
}
// If over the max threshold, free enough to place us at the lower threshold.
usagePercent := 100 - int(available*100/capacity)
if usagePercent >= im.policy.HighThresholdPercent {
amountToFree := capacity*int64(100-im.policy.LowThresholdPercent)/100 - available
glog.Infof("[imageGCManager]: Disk usage on image filesystem is at %d%% which is over the high threshold (%d%%). Trying to free %d bytes", usagePercent, im.policy.HighThresholdPercent, amountToFree)
freed, err := im.freeSpace(amountToFree, time.Now())
if err != nil {
return err
}
if freed < amountToFree {
err := fmt.Errorf("failed to garbage collect required amount of images. Wanted to free %d bytes, but freed %d bytes", amountToFree, freed)
im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.FreeDiskSpaceFailed, err.Error())
return err
}
}
return nil
}
获取完镜像的fsStats信息,就按照相关的镜像GC策略进行镜像清除了
具体实现
freed, err := im.freeSpace(amountToFree, time.Now())
if err != nil {
return err
}
函数im.freeSpace的具体实现方式k8s.io/kubernetes/pkg/kubelet/images/image_gc_manager.go 328-389
行
执行逻辑就是正在使用的镜像不会清除,删除不使用的镜像
该函数在10min
为周期不停地执行
本人觉得记住这俩 差不多搞定了
runtimeService: newInstrumentedRuntimeService(runtimeService),
imageService: newInstrumentedImageManagerService(imageService),
到此kubelet的镜像清除策略 已经分析完成