【kubernetes/k8s源码分析】kubelet pod 挂载volume源码分析

1.  NewVolumeManager 初始化volumeManager

       volumePluginMgr 使用volume 插件

       实例化 volumeManager,实例化desiredStateOfWorldPopulator

       controllerAttachDetachEnabled:决定 attach 是在 AD controller 还是 kubelet 处理

       这个reconciler是主要调整,attach detach mount unmount操作的,这个主要讲解

// NewVolumeManager returns a new concrete instance implementing the
// VolumeManager interface.
//
// kubeClient - kubeClient is the kube API client used by DesiredStateOfWorldPopulator
//   to communicate with the API server to fetch PV and PVC objects
// volumePluginMgr - the volume plugin manager used to access volume plugins.
//   Must be pre-initialized.
func NewVolumeManager(。。。) VolumeManager {

	vm := &volumeManager{
		。。。。
	}

	vm.desiredStateOfWorldPopulator = populator.NewDesiredStateOfWorldPopulator(
		。。。。
    vm.reconciler = reconciler.NewReconciler

	return vm
}

2. Run函数

     包括 desiredStateOfWorldPopulator 与 reconciler的启动,分别讲解

func (vm *volumeManager) Run(sourcesReady config.SourcesReady, stopCh <-chan struct{}) {
	defer runtime.HandleCrash()

	go vm.desiredStateOfWorldPopulator.Run(sourcesReady, stopCh)
	klog.V(2).Infof("The desired_state_of_world populator starts")

	klog.Infof("Starting Kubelet Volume Manager")
	go vm.reconciler.Run(stopCh)

	metrics.Register(vm.actualStateOfWorld, vm.desiredStateOfWorld, vm.volumePluginMgr)

	<-stopCh
	klog.Infof("Shutting down Kubelet Volume Manager")
}

 

3. reconcile函数

    3.1 实际已经挂载的,但是预期不需要的需要unmount操作

// Ensure volumes that should be unmounted are unmounted.
for _, mountedVolume := range rc.actualStateOfWorld.GetMountedVolumes() {
	if !rc.desiredStateOfWorld.PodExistsInVolume(mountedVolume.PodName, mountedVolume.VolumeName) {
		// Volume is mounted, unmount it
		klog.V(5).Infof(mountedVolume.GenerateMsgDetailed("Starting operationExecutor.UnmountVolume", ""))
		err := rc.operationExecutor.UnmountVolume(
			mountedVolume.MountedVolume, rc.actualStateOfWorld, rc.kubeletPodsDir)
		if err != nil &&
			!nestedpendingoperations.IsAlreadyExists(err) &&
			!exponentialbackoff.IsExponentialBackoff(err) {
			// Ignore nestedpendingoperations.IsAlreadyExists and exponentialbackoff.IsExponentialBackoff errors, they are expected.
			// Log all other errors.
			klog.Errorf(mountedVolume.GenerateErrorDetailed(fmt.Sprintf("operationExecutor.UnmountVolume failed (controllerAttachDetachEnabled %v)", rc.controllerAttachDetachEnabled), err).Error())
		}
		if err == nil {
			klog.Infof(mountedVolume.GenerateMsgDetailed("operationExecutor.UnmountVolume started", ""))
		}
	}
}

    3.2 确定期望状态的都已经attached / mounted

      如果 controllerAttachDetachEnabled 为 true 情况,在 AD controller 处理,这里只需要验证 volume 被 attached

【kubernetes/k8s源码分析】kubelet pod 挂载volume源码分析_第1张图片

// Ensure volumes that should be attached/mounted are attached/mounted.
for _, volumeToMount := range rc.desiredStateOfWorld.GetVolumesToMount() {
	volMounted, devicePath, err := rc.actualStateOfWorld.PodExistsInVolume(volumeToMount.PodName, volumeToMount.VolumeName)
	volumeToMount.DevicePath = devicePath
	if cache.IsVolumeNotAttachedError(err) {
		if rc.controllerAttachDetachEnabled || !volumeToMount.PluginIsAttachable {
			// Volume is not attached (or doesn't implement attacher), kubelet attach is disabled, wait
			// for controller to finish attaching volume.
			klog.V(5).Infof(volumeToMount.GenerateMsgDetailed("Starting operationExecutor.VerifyControllerAttachedVolume", ""))
			err := rc.operationExecutor.VerifyControllerAttachedVolume(
				volumeToMount.VolumeToMount,
				rc.nodeName,
				rc.actualStateOfWorld)
			if err != nil &&
				!nestedpendingoperations.IsAlreadyExists(err) &&
				!exponentialbackoff.IsExponentialBackoff(err) {
				// Ignore nestedpendingoperations.IsAlreadyExists and exponentialbackoff.IsExponentialBackoff errors, they are expected.
				// Log all other errors.
				klog.Errorf(volumeToMount.GenerateErrorDetailed(fmt.Sprintf("operationExecutor.VerifyControllerAttachedVolume failed (controllerAttachDetachEnabled %v)", rc.controllerAttachDetachEnabled), err).Error())
			}
			if err == nil {
				klog.Infof(volumeToMount.GenerateMsgDetailed("operationExecutor.VerifyControllerAttachedVolume started", ""))
			}
		}

    3.3 volume没被attach情况

      这种是 controllerAttachDetachEnabled 为 false 在 kubelet 处理 attach 的情况, VerifyControllerAttachedVolume 函数检查指定的 volume 是否在指定节点attachedvolumes 状态栏,如果 volume 存在,则更新 actualstateofworld 中 volume 为 attached,

func (og *operationGenerator) GenerateVerifyControllerAttachedVolumeFunc(
	volumeToMount VolumeToMount,
	nodeName types.NodeName,
	actualStateOfWorld ActualStateOfWorldAttacherUpdater) (volumetypes.GeneratedOperations, error) {
	volumePlugin, err :=
		og.volumePluginMgr.FindPluginBySpec(volumeToMount.VolumeSpec)
	if err != nil || volumePlugin == nil {
		return volumetypes.GeneratedOperations{}, volumeToMount.GenerateErrorDetailed("VerifyControllerAttachedVolume.FindPluginBySpec failed", err)
	}

     重点讲解这块内容,VolumeToAttach 结构代表volume应该被attach到node上的,是 attach 在 kubelet 这块实现的

     AttachVolume第4章节重点讲解,attach volume的操作

// Volume is not attached to node, kubelet attach is enabled, volume implements an attacher,
// so attach it
volumeToAttach := operationexecutor.VolumeToAttach{
	VolumeName: volumeToMount.VolumeName,
	VolumeSpec: volumeToMount.VolumeSpec,
	NodeName:   rc.nodeName,
}
klog.V(5).Infof(volumeToAttach.GenerateMsgDetailed("Starting operationExecutor.AttachVolume", ""))
err := rc.operationExecutor.AttachVolume(volumeToAttach, rc.actualStateOfWorld)
if err != nil &&
	!nestedpendingoperations.IsAlreadyExists(err) &&
	!exponentialbackoff.IsExponentialBackoff(err) {
	// Ignore nestedpendingoperations.IsAlreadyExists and exponentialbackoff.IsExponentialBackoff errors, they are expected.
	// Log all other errors.
	klog.Errorf(volumeToMount.GenerateErrorDetailed(fmt.Sprintf("operationExecutor.AttachVolume failed (controllerAttachDetachEnabled %v)", rc.controllerAttachDetachEnabled), err).Error())
}
if err == nil {
	klog.Infof(volumeToMount.GenerateMsgDetailed("operationExecutor.AttachVolume started", ""))
}

 

4. AttachVolume函数

      具体干啥了呢,继续分析,主要两个函数 GenerateAttachVolumeFunc与 Run函数

func (oe *operationExecutor) AttachVolume(
	volumeToAttach VolumeToAttach,
	actualStateOfWorld ActualStateOfWorldAttacherUpdater) error {
	generatedOperations, err :=
		oe.operationGenerator.GenerateAttachVolumeFunc(volumeToAttach, actualStateOfWorld)
	if err != nil {
		return err
	}

	return oe.pendingOperations.Run(
		volumeToAttach.VolumeName, "" /* podName */, generatedOperations)
}

 

5. GenerateAttachVolumeFunc

   产生一个函数,主要更能就是进行attach volume功能,其中包括是插件的設置(CSI或者不是)

    5.1 調用插件實例化Attach,實現這個Attach接口

volumeAttacher, newAttacherErr := attachableVolumePlugin.NewAttacher()
if newAttacherErr != nil {
	eventRecorderFunc(&err)
	return volumetypes.GeneratedOperations{}, volumeToAttach.GenerateErrorDetailed("AttachVolume.NewAttacher failed", newAttacherErr)
}

    5.2 Attach操作並更新actualStateOfWorld

// Execute attach
devicePath, attachErr := volumeAttacher.Attach(
	volumeToAttach.VolumeSpec, volumeToAttach.NodeName)


// Update actual state of world
addVolumeNodeErr := actualStateOfWorld.MarkVolumeAsAttached(
	v1.UniqueVolumeName(""), originalSpec, volumeToAttach.NodeName, devicePath)
if addVolumeNodeErr != nil {
	// On failure, return error. Caller will log and retry.
	return volumeToAttach.GenerateError("AttachVolume.MarkVolumeAsAttached failed", addVolumeNodeErr)
}

    插件attach,比如使用ceph,可以參考文章 https://blog.csdn.net/zhonglinzhang/article/details/89841476 第6章節attach部分

 

  6. 需要進行mount volume情況

err := rc.operationExecutor.MountVolume(
	rc.waitForAttachTimeout,
	volumeToMount.VolumeToMount,
	rc.actualStateOfWorld,
	isRemount)

 

7. MountVolume函數

    checkVolumeModeFilesystem檢查volume的類新,文件系統還是block

fsVolume, err := util.CheckVolumeModeFilesystem(volumeToMount.VolumeSpec)
if err != nil {
	return err
}
var generatedOperations volumetypes.GeneratedOperations
if fsVolume {
	// Filesystem volume case
	// Mount/remount a volume when a volume is attached
	generatedOperations = oe.operationGenerator.GenerateMountVolumeFunc(
		waitForAttachTimeout, volumeToMount, actualStateOfWorld, isRemount)

} else {
	// Block volume case
	// Creates a map to device if a volume is attached
	generatedOperations, err = oe.operationGenerator.GenerateMapVolumeFunc(
		waitForAttachTimeout, volumeToMount, actualStateOfWorld)
}

    7.1  GenerateMountVolumeFunc 函数

   volumeMounter, newMounterErr := volumePlugin.NewMounter

        如果场景为 kubernetes.io/csi,则实例化 NewAttacher 以及 NewDeviceMounter,具体实现在 pkg/volume/csi 目录下

// Get attacher, if possible
attachableVolumePlugin, _ :=
	og.volumePluginMgr.FindAttachablePluginBySpec(volumeToMount.VolumeSpec)
var volumeAttacher volume.Attacher
if attachableVolumePlugin != nil {
	volumeAttacher, _ = attachableVolumePlugin.NewAttacher()
}

// get deviceMounter, if possible
deviceMountableVolumePlugin, _ := og.volumePluginMgr.FindDeviceMountablePluginBySpec(volumeToMount.VolumeSpec)
var volumeDeviceMounter volume.DeviceMounter
if deviceMountableVolumePlugin != nil {
	volumeDeviceMounter, _ = deviceMountableVolumePlugin.NewDeviceMounter()
}

     7.1.1 WaitForAttach 函数

     实现在 pkg/volume/csi/csi_attacher.go 中,WaitForAttach 等待 volumeattachment 状态设置 true

if volumeAttacher != nil {
	// Wait for attachable volumes to finish attaching
	klog.Infof(volumeToMount.GenerateMsgDetailed("MountVolume.WaitForAttach entering", fmt.Sprintf("DevicePath %q", volumeToMount.DevicePath)))

	devicePath, err = volumeAttacher.WaitForAttach(
		volumeToMount.VolumeSpec, devicePath, volumeToMount.Pod, waitForAttachTimeout)
	if err != nil {
		// On failure, return error. Caller will log and retry.
		return volumeToMount.GenerateError("MountVolume.WaitForAttach failed", err)
	}

	klog.Infof(volumeToMount.GenerateMsgDetailed("MountVolume.WaitForAttach succeeded", fmt.Sprintf("DevicePath %q", devicePath)))
}

     7.1.2 mount global 

     GetDeviceMountPath 获取 device mount 路径,则为 插件地址 + pv + pv 名字 + globalmount

     比如 /var/lib/kubelet/plugins/kubernetes.io/csi/pv/pvc-149bb57c-4c8f-48bf-8439-09dd505e6aa2/globalmount

     MountDevice 函数创建 global path,保存记录写入 vol_data.json

{
    "driverName":"hostpath.csi.k8s.io",
    "volumeHandle":"f113091e-9584-11ea-a16d-8e69e9f3d6c6"
}

      发送 GRPC 请求 NodeStageVolume,挂载全局 

if volumeDeviceMounter != nil {
	deviceMountPath, err :=
		volumeDeviceMounter.GetDeviceMountPath(volumeToMount.VolumeSpec)
	if err != nil {
		// On failure, return error. Caller will log and retry.
		return volumeToMount.GenerateError("MountVolume.GetDeviceMountPath failed", err)
	}

	// Mount device to global mount path
	err = volumeDeviceMounter.MountDevice(
		volumeToMount.VolumeSpec,
		devicePath,
		deviceMountPath)
	if err != nil {
		// On failure, return error. Caller will log and retry.
		return volumeToMount.GenerateError("MountVolume.MountDevice failed", err)
	}

	klog.Infof(volumeToMount.GenerateMsgDetailed("MountVolume.MountDevice succeeded", fmt.Sprintf("device mount path %q", deviceMountPath)))

	// Update actual state of world to reflect volume is globally mounted
	markDeviceMountedErr := actualStateOfWorld.MarkDeviceAsMounted(
		volumeToMount.VolumeName, devicePath, deviceMountPath)
	if markDeviceMountedErr != nil {
		// On failure, return error. Caller will log and retry.
		return volumeToMount.GenerateError("MountVolume.MarkDeviceAsMounted failed", markDeviceMountedErr)
	}

	resizeOptions.DeviceMountPath = deviceMountPath
	resizeOptions.CSIVolumePhase = volume.CSIVolumeStaged

	// resizeFileSystem will resize the file system if user has requested a resize of
	// underlying persistent volume and is allowed to do so.
	resizeDone, resizeError = og.resizeFileSystem(volumeToMount, resizeOptions)

	if resizeError != nil {
		klog.Errorf("MountVolume.resizeFileSystem failed with %v", resizeError)
		return volumeToMount.GenerateError("MountVolume.MountDevice failed while expanding volume", resizeError)
	}
}

 

    7.2 调用插件NewMounter实现了Mount方法

volumeMounter, newMounterErr := volumePlugin.NewMounter(
	volumeToMount.VolumeSpec,
	volumeToMount.Pod,
	volume.VolumeOptions{})
if newMounterErr != nil {
	return volumeToMount.GenerateError("MountVolume.NewMounter initialization failed", newMounterErr)

}

      如果插件为flexvolume

      路径为pkg/volume/flexvolume这个目录下

// NewMounter is part of the volume.VolumePlugin interface.
func (plugin *flexVolumePlugin) NewMounter(spec *volume.Spec, pod *api.Pod, _ volume.VolumeOptions) (volume.Mounter, error) {
	return plugin.newMounterInternal(spec, pod, plugin.host.GetMounter(plugin.GetPluginName()), plugin.runner)
}

     如果使用 CSI plugin 则

  csi:
    driver: hostpath.csi.k8s.io
    fsType: ext4
    volumeAttributes:
      storage.kubernetes.io/csiProvisionerIdentity: 1589335706169-8081-hostpath.csi.k8s.io
    volumeHandle: f113091e-9584-11ea-a16d-8e69e9f3d6c6

func (p *csiPlugin) NewMounter(
	spec *volume.Spec,
	pod *api.Pod,
	_ volume.VolumeOptions) (volume.Mounter, error) {

	volSrc, pvSrc, err := getSourceFromSpec(spec)
	if err != nil {
		return nil, err
	}

     在 pods 工作目录创建 vol_data.json 文件,例如 /var/lib/kubelet/pods/747cc4e8-1e91-49e2-8456-89800c91dffa/volumes/kubernetes.io~csi/pvc-149bb57c-4c8f-48bf-8439-09dd505e6aa2

# cat vol_data.json

{
    "attachmentID":"csi-fbcecaed2a122919ca11ef1fa40d9d51f545db042b9a0acf386c0726c9100a5d",
    "driverMode":"persistent",
    "driverName":"hostpath.csi.k8s.io",
    "nodeName":"master-node",
    "specVolID":"pvc-149bb57c-4c8f-48bf-8439-09dd505e6aa2",
    "volumeHandle":"f113091e-9584-11ea-a16d-8e69e9f3d6c6"
}

// persist volume info data for teardown
node := string(p.host.GetNodeName())
volData := map[string]string{
	volDataKey.specVolID:  spec.Name(),
	volDataKey.volHandle:  volumeHandle,
	volDataKey.driverName: driverName,
	volDataKey.nodeName:   node,
	volDataKey.driverMode: string(driverMode),
}

attachID := getAttachmentName(volumeHandle, driverName, node)
volData[volDataKey.attachmentID] = attachID

if err := saveVolumeData(dataDir, volDataFileName, volData); err != nil {
	klog.Error(log("failed to save volume info data: %v", err))
	if err := os.RemoveAll(dataDir); err != nil {
		klog.Error(log("failed to remove dir after error [%s]: %v", dataDir, err))
		return nil, err
	}
	return nil, err
}

    7.3 SetUp 函数

      最后调用 CSI 实现的插件发送 GRPC NodePublishVolume 请求

func (c *csiMountMgr) SetUpAt(dir string, mounterArgs volume.MounterArgs) error {
	klog.V(4).Infof(log("Mounter.SetUpAt(%s)", dir))

	mounted, err := isDirMounted(c.plugin, dir)
	if err != nil {
		klog.Error(log("mounter.SetUpAt failed while checking mount status for dir [%s]", dir))
		return err
	}

	if mounted {
		klog.V(4).Info(log("mounter.SetUpAt skipping mount, dir already mounted [%s]", dir))
		return nil
	}

 

    更新 actualStateOfWorld,标记已经mount

// Update actual state of world
markVolMountedErr := actualStateOfWorld.MarkVolumeAsMounted(
	volumeToMount.PodName,
	volumeToMount.Pod.UID,
	volumeToMount.VolumeName,
	volumeMounter,
	nil,
	volumeToMount.OuterVolumeSpecName,
	volumeToMount.VolumeGidValue,
	originalSpec)
if markVolMountedErr != nil {
	// On failure, return error. Caller will log and retry.
	return volumeToMount.GenerateError("MountVolume.MarkVolumeAsMounted failed", markVolMountedErr)
}

 

你可能感兴趣的:(kubernetes,CSI,存储)