Kubelet v1.25.x源码——ContainerLogManager

1. 环境说明

Kubernetes源码版本:remotes/origin/release-1.25
Kubernetes编译出来的Kubelet版本:Kubernetes v1.24.0-beta.0.2463+ee7799bab469d7
Kubernetes集群实验环境:使用Kubernetes v1.25.4二进制的方式搭建了一个单节点集群

K8S 单节点单节点搭建可以参考:Kubernetes v1.25 搭建单节点集群用于Debug K8S源码

Golang版本:go1.19.3 linux/amd64
IDEA版本:2022.2.3
Delve版本:1.9.1

[root@k8s-master1 kubernetes]#
[root@k8s-master1 kubernetes]# dlv version
Delve Debugger
Version: 1.9.1
Build: $Id: d81b9fd12bfa603f3cf7a4bc842398bd61c42940 $
[root@k8s-master1 kubernetes]#
[root@k8s-master1 kubernetes]# go version
go version go1.19.3 linux/amd64
[root@k8s-master1 kubernetes]#
[root@k8s-master1 kubernetes]# kubectl version
WARNING: This version information is deprecated and will be replaced with the output from kubectl version --short.  Use --output=yaml|json to get the full version.
Client Version: version.Info{Major:"1", Minor:"25", GitVersion:"v1.25.4", GitCommit:"872a965c6c6526caa949f0c6ac028ef7aff3fb78", GitTreeState:"clean", BuildDate:"2022-11-09T13:36:36Z", GoVersion:"go1.19.3", Compiler:"gc", Platform:"linux/amd64"}
Kustomize Version: v4.5.7
Server Version: version.Info{Major:"1", Minor:"25", GitVersion:"v1.25.4", GitCommit:"872a965c6c6526caa949f0c6ac028ef7aff3fb78", GitTreeState:"clean", BuildDate:"2022-11-09T13:29:58Z", GoVersion:"go1.19.3", Compiler:"gc", Platform:"linux/amd64"}
[root@k8s-master1 kubernetes]#
[root@k8s-master1 kubernetes]#
[root@k8s-master1 kubernetes]# kubectl get nodes -owide
NAME          STATUS   ROLES    AGE   VERSION   INTERNAL-IP     EXTERNAL-IP   OS-IMAGE                KERNEL-VERSION                CONTAINER-RUNTIME
k8s-master1   Ready    <none>   31h   v1.25.4   192.168.11.71   <none>        CentOS Linux 7 (Core)   3.10.0-1160.80.1.el7.x86_64   containerd://1.6.10
[root@k8s-master1 kubernetes]#
[root@k8s-master1 kubernetes]#
[root@k8s-master1 kubernetes]# kubectl get componentstatus
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME                 STATUS    MESSAGE                         ERROR
etcd-0               Healthy   {"health":"true","reason":""}
controller-manager   Healthy   ok
scheduler            Healthy   ok
[root@k8s-master1 kubernetes]#

Kubelet启动参数配置如下:

[root@k8s-master1 kubernetes]# ps -ef|grep "/usr/local/bin/kubelet"
root       7972      1  6 07:06 ?        00:00:06 /usr/local/bin/kubelet --bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.kubeconfig --kubeconfig=/etc/kubernetes/kubelet.kubeconfig --config=/etc/kubernetes/kubelet-conf.yml --container-runtime-endpoint=unix:///run/containerd/containerd.sock --node-labels=node.kubernetes.io/node= --v=8
root       9549   6424  0 07:07 pts/0    00:00:00 grep --color=auto /usr/local/bin/kubelet
[root@k8s-master1 kubernetes]#

Kubelet参数配置如下:

apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
address: 0.0.0.0
port: 10250
readOnlyPort: 10255
authentication:
  anonymous:
    enabled: false
  webhook:
    cacheTTL: 2m0s
    enabled: true
  x509:
    clientCAFile: /etc/kubernetes/pki/ca.pem
authorization:
  mode: Webhook
  webhook:
    cacheAuthorizedTTL: 5m0s
    cacheUnauthorizedTTL: 30s
cgroupDriver: systemd
cgroupsPerQOS: true
clusterDNS:
- 10.96.0.10
clusterDomain: cluster.local
containerLogMaxFiles: 5
containerLogMaxSize: 10Mi
contentType: application/vnd.kubernetes.protobuf
cpuCFSQuota: true
cpuManagerPolicy: none
cpuManagerReconcilePeriod: 10s
enableControllerAttachDetach: true
enableDebuggingHandlers: true
enforceNodeAllocatable:
- pods
eventBurst: 10
eventRecordQPS: 5
evictionHard:
  imagefs.available: 15%
  memory.available: 100Mi
  nodefs.available: 10%
  nodefs.inodesFree: 5%
evictionPressureTransitionPeriod: 5m0s
failSwapOn: true
fileCheckFrequency: 20s
hairpinMode: promiscuous-bridge
healthzBindAddress: 127.0.0.1
healthzPort: 10248
httpCheckFrequency: 20s
imageGCHighThresholdPercent: 85
imageGCLowThresholdPercent: 80
imageMinimumGCAge: 2m0s
iptablesDropBit: 15
iptablesMasqueradeBit: 14
kubeAPIBurst: 10
kubeAPIQPS: 5
makeIPTablesUtilChains: true
maxOpenFiles: 1000000
maxPods: 110
nodeStatusUpdateFrequency: 10s
oomScoreAdj: -999
podPidsLimit: -1
registryBurst: 10
registryPullQPS: 5
resolvConf: /etc/resolv.conf
rotateCertificates: true
runtimeRequestTimeout: 2m0s
serializeImagePulls: true
staticPodPath: /etc/kubernetes/manifests
streamingConnectionIdleTimeout: 4h0m0s
syncFrequency: 1m0s
volumeStatsAggPeriod: 1m0s

2. 组件概览

ContainerLogManager自然是用于管理容器的日志,我们在分析ContainerGCManager的时候,就已经看到过ContainerLogManager的身影。ContainerLogManager主要用于清除未使用的日志,移除多余的日志,压缩日志,以及Rotate容器日志

3. 源码剖析

3.1. ContainerLogManager

ContainerLogManager

老规矩,先来看看ContainerLogManager的接口定义:

type ContainerLogManager interface {
	Start()

	Clean(containerID string) error
}

一共就俩接口,Start不用说,看了那么多Kubernetes源码,这个方法肯定是在Kubelet启动过程中会通过一个协程来循环调用;Clean方法看名字应该是用于清理容器日志的,我们来看看具体逻辑

3.2. LogRotatePolicy

LogRotatePolicy

ImageGCPolicy, ContainerGCPolicy类似,LogRotatePolicy适用于控制日志轮替使用的策略

type LogRotatePolicy struct {
    // 日志的大小
	MaxSize int64
    // 日志文件的数量
	MaxFiles int
}

3.3. containerLogManager

containerLogManager
type containerLogManager struct {
    // 既然要操作底层容器的日志,那么依赖RuntimeService非常合情合理
	runtimeService internalapi.RuntimeService
    // 用于目录、文件的增删改查
	osInterface    kubecontainer.OSInterface
	// 用于指定日志轮替的策略
	policy         LogRotatePolicy
	clock          clock.Clock
	mutex          sync.Mutex
}

3.4. Clean

Clean

清理容器日志的具体逻辑如下:

  • 1、首先调用符合CRI规范的容器运行时ContainerStatus接口获取容器状态
  • 2、然后获取所有日志文件的路径
  • 3、调用操作系统的接口,一个一个的删除
func (c *containerLogManager) Clean(containerID string) error {
	c.mutex.Lock()
	defer c.mutex.Unlock()
	resp, err := c.runtimeService.ContainerStatus(containerID, false)
	if err != nil {
		return fmt.Errorf("failed to get container status %q: %v", containerID, err)
	}
	if resp.GetStatus() == nil {
		return fmt.Errorf("container status is nil for %q", containerID)
	}
	pattern := fmt.Sprintf("%s*", resp.GetStatus().GetLogPath())
	logs, err := c.osInterface.Glob(pattern)
	if err != nil {
		return fmt.Errorf("failed to list all log files with pattern %q: %v", pattern, err)
	}

	for _, l := range logs {
		if err := c.osInterface.Remove(l); err != nil && !os.IsNotExist(err) {
			return fmt.Errorf("failed to remove container %q log %q: %v", containerID, l, err)
		}
	}

	return nil
}

3.5. Start

Start

还是一样的配方,一样的味道。启动一个协程不停的去执行rotateLogs,我们来看看该方法做了啥。

func (c *containerLogManager) Start() {
	// Start a goroutine periodically does container log rotation.
	go wait.Forever(func() {
		if err := c.rotateLogs(); err != nil {
			klog.ErrorS(err, "Failed to rotate container logs")
		}
	}, logMonitorPeriod)
}

3.6. rotateLogs

rotateLogs
  • 1、通过调用CRI ListContainers接口获取所有容器
  • 2、如果容器还在运行,直接跳过Rotate该容器
  • 3、通过容器ID查询容器的状态,从而获取容器的日志位置
  • 4、如果获取容器日志出错,那么尝试调用CRIReopenContainerLog重新打开日志
  • 5、如果日志的大小小于RotatePolicy.MaxSize,那么放弃对于该容器日志的Rotate
  • 6、否则,执行Rotate操作
func (c *containerLogManager) rotateLogs() error {
	c.mutex.Lock()
	defer c.mutex.Unlock()
	containers, err := c.runtimeService.ListContainers(&runtimeapi.ContainerFilter{})
	if err != nil {
		return fmt.Errorf("failed to list containers: %v", err)
	}
	for _, container := range containers {
		if container.GetState() != runtimeapi.ContainerState_CONTAINER_RUNNING {
			continue
		}
		id := container.GetId()
		// Note that we should not block log rotate for an error of a single container.
		resp, err := c.runtimeService.ContainerStatus(id, false)
		if err != nil {
			klog.ErrorS(err, "Failed to get container status", "containerID", id)
			continue
		}
		if resp.GetStatus() == nil {
			klog.ErrorS(err, "Container status is nil", "containerID", id)
			continue
		}
		path := resp.GetStatus().GetLogPath()
		info, err := c.osInterface.Stat(path)
		if err != nil {
			if !os.IsNotExist(err) {
				klog.ErrorS(err, "Failed to stat container log", "path", path)
				continue
			}
			if err := c.runtimeService.ReopenContainerLog(id); err != nil {
				klog.ErrorS(err, "Container log doesn't exist, reopen container log failed", "containerID", id, "path", path)
				continue
			}
			info, err = c.osInterface.Stat(path)
			if err != nil {
				klog.ErrorS(err, "Failed to stat container log after reopen", "path", path)
				continue
			}
		}
		if info.Size() < c.policy.MaxSize {
			continue
		}
		// Perform log rotation.
		if err := c.rotateLog(id, path); err != nil {
			klog.ErrorS(err, "Failed to rotate log for container", "path", path, "containerID", id)
			continue
		}
	}
	return nil
}

3.7. rotateLog

rotateLog
  • 1、清理未使用的日志
  • 2、移除多余的日志
  • 3、压缩日志
  • 4、Rotate最新的日志
func (c *containerLogManager) rotateLog(id, log string) error {
	pattern := fmt.Sprintf("%s.*", log)
	logs, err := filepath.Glob(pattern)
	if err != nil {
		return fmt.Errorf("failed to list all log files with pattern %q: %v", pattern, err)
	}

	logs, err = c.cleanupUnusedLogs(logs)
	if err != nil {
		return fmt.Errorf("failed to cleanup logs: %v", err)
	}

	logs, err = c.removeExcessLogs(logs)
	if err != nil {
		return fmt.Errorf("failed to remove excess logs: %v", err)
	}

	for _, l := range logs {
		if strings.HasSuffix(l, compressSuffix) {
			continue
		}
		if err := c.compressLog(l); err != nil {
			return fmt.Errorf("failed to compress log %q: %v", l, err)
		}
	}

	if err := c.rotateLatestLog(id, log); err != nil {
		return fmt.Errorf("failed to rotate log %q: %v", log, err)
	}

	return nil
}

3.7.1. cleanupUnusedLogs

cleanupUnusedLogs
func (c *containerLogManager) cleanupUnusedLogs(logs []string) ([]string, error) {
	inuse, unused := filterUnusedLogs(logs)
	for _, l := range unused {
		if err := c.osInterface.Remove(l); err != nil {
			return nil, fmt.Errorf("failed to remove unused log %q: %v", l, err)
		}
	}
	return inuse, nil
}

func filterUnusedLogs(logs []string) (inuse []string, unused []string) {
	for _, l := range logs {
		if isInUse(l, logs) {
			inuse = append(inuse, l)
		} else {
			unused = append(unused, l)
		}
	}
	return inuse, unused
}

func isInUse(l string, logs []string) bool {
	// All temporary files are not in use.
	if strings.HasSuffix(l, tmpSuffix) {
		return false
	}
	// All compressed logs are in use.
	if strings.HasSuffix(l, compressSuffix) {
		return true
	}
	// Files has already been compressed are not in use.
	for _, another := range logs {
		if l+compressSuffix == another {
			return false
		}
	}
	return true
}

3.7.2. removeExcessLogs

removeExcessLogs
func (c *containerLogManager) removeExcessLogs(logs []string) ([]string, error) {
	sort.Strings(logs)
	maxRotatedFiles := c.policy.MaxFiles - 2
	if maxRotatedFiles < 0 {
		maxRotatedFiles = 0
	}
	i := 0
	for ; i < len(logs)-maxRotatedFiles; i++ {
		if err := c.osInterface.Remove(logs[i]); err != nil {
			return nil, fmt.Errorf("failed to remove old log %q: %v", logs[i], err)
		}
	}
	logs = logs[i:]
	return logs, nil
}

3.7.3. compressLog

compressLog
func (c *containerLogManager) compressLog(log string) error {
	r, err := c.osInterface.Open(log)
	if err != nil {
		return fmt.Errorf("failed to open log %q: %v", log, err)
	}
	defer r.Close()
	tmpLog := log + tmpSuffix
	f, err := c.osInterface.OpenFile(tmpLog, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
	if err != nil {
		return fmt.Errorf("failed to create temporary log %q: %v", tmpLog, err)
	}
	defer func() {
		// Best effort cleanup of tmpLog.
		c.osInterface.Remove(tmpLog)
	}()
	defer f.Close()
	w := gzip.NewWriter(f)
	defer w.Close()
	if _, err := io.Copy(w, r); err != nil {
		return fmt.Errorf("failed to compress %q to %q: %v", log, tmpLog, err)
	}

	w.Close()
	f.Close()
	compressedLog := log + compressSuffix
	if err := c.osInterface.Rename(tmpLog, compressedLog); err != nil {
		return fmt.Errorf("failed to rename %q to %q: %v", tmpLog, compressedLog, err)
	}
	// Remove old log file.
	r.Close()
	if err := c.osInterface.Remove(log); err != nil {
		return fmt.Errorf("failed to remove log %q after compress: %v", log, err)
	}
	return nil
}

3.7.4. rotateLatestLog

rotateLatestLog
func (c *containerLogManager) rotateLatestLog(id, log string) error {
	timestamp := c.clock.Now().Format(timestampFormat)
	rotated := fmt.Sprintf("%s.%s", log, timestamp)
	if err := c.osInterface.Rename(log, rotated); err != nil {
		return fmt.Errorf("failed to rotate log %q to %q: %v", log, rotated, err)
	}
	if err := c.runtimeService.ReopenContainerLog(id); err != nil {
		if renameErr := c.osInterface.Rename(rotated, log); renameErr != nil {
			klog.ErrorS(renameErr, "Failed to rename rotated log", "rotatedLog", rotated, "newLog", log, "containerID", id)
		}
		return fmt.Errorf("failed to reopen container log %q: %v", id, err)
	}
	return nil
}

你可能感兴趣的:(#,Kubelet,kubelet,kubernetes,LogManager,日志,容器)