k8s CSI架构介绍及源码分析

k8s本身提供了很多内置的volume plugin,比如ceph,nfs及第三方厂商的存储,这样带来了代码臃肿,不好维护,不灵活等问题,比如如果想修复存储bug还需和k8s一块发布,为了将k8s和存储系统解耦,抽象出了CSI(container storage interface)接口,其提供三种类型的gRPC接口,每个CSI plugin必须实现这些接口,具体可参考CSi spec https://github.com/container-storage-interface/spec/blob/master/spec.md


k8s CSI架构介绍及源码分析_第1张图片

k8s CSI架构介绍及源码分析_第2张图片




主要关注daemonset pod初始化及kubelet中的plugin-manager如何识别csi插件

a. csi插件容器启动后会创建并监听/csi/csi.sock
b. Node-driver-registrar容器启动时通过/csi/csi.sock连接到csi插件容器,并调用GetPluginInfo接口获取csi插件信息
c. Node-driver-registrar容器启动后还会创建并监听/var/lib/kubelet/plugins/csi-plugin/csi.sock
d. plugin-manager会watch目录/var/lib/kubelet/plugins,发现有socket文件创建,就会通过此文件连接到Node-driver-registrar容器,并调用GetInfo
e. plugin-manager根据获取的插件信息创建csinode
apiVersion: storage.k8s.io/v1
kind: CSINode
    storage.alpha.kubernetes.io/migrated-plugins: kubernetes.io/aws-ebs,kubernetes.io/azure-disk,kubernetes.io/azure-file,kubernetes.io/cinder,kubernetes.io/gce-pd
  creationTimestamp: "2022-06-16T05:25:59Z"
  name: master
  - apiVersion: v1
    kind: Node
    name: master
    uid: 5a93ff8c-7b53-404b-a055-56ddc2cc9262
  resourceVersion: "2174002"
  uid: e7649650-1b49-4a81-b3af-ea634dd78741
  - name: nfs.csi.k8s.io
    nodeID: master
    topologyKeys: null


a. 用户创建pvc
b. pv-controller监听到pvc添加事件后,查找合适的pv与其绑定,对csi插件来说肯定是找不到合适的pv的,此时会将volume.kubernetes.io/storage-provisioner
c. external-provisioner监听到pvc的更新事件后,会判断volume.kubernetes.io/storage-provisioner指定的值是否为自己,如果是则调用CreateVolume创建pv
d. pv-controller监听到pvc更新后,将pvc和pv进行绑定
e. 用户创建pod,指定了volume为前面创建的pvc
f. kube-scheduler监听到pod添加事件,将其调度到合适的node上
g. ad-controller监听到pod的volume没有attach,调用csi-volume插件创建volumeattachment。如果csi插件不支持attach操作,则不必创建volumeattachment,
比如nfs-csi插件,在其csidriver中指定了“Attach Required”为false
kubectl describe csiDriver nfs.csi.k8s.io
Name:         nfs.csi.k8s.io
API Version:  storage.k8s.io/v1
Kind:         CSIDriver
  Attach Required:     false   //指定了nfs不需要attach操作
  Fs Group Policy:     File
  Pod Info On Mount:   false
  Requires Republish:  false
  Storage Capacity:    false
h. external-attacher监听到volumeattachment事件后,调用ControllerPublishVolume进行attach操作
i. volume-manager发现volume没有mount,则调用csi-volume插件进行mount操作



pv controller

pv controller检测到unbound pvc后,调用syncUnboundClaim

// syncUnboundClaim is the main controller method to decide what to do with an
// unbound claim.
func (ctrl *PersistentVolumeController) syncUnboundClaim(claim *v1.PersistentVolumeClaim) error
	if claim.Spec.VolumeName == "" {
		//才需要创建对应的pv,另一种模式是VolumeBindingImmediate,pv controller监听到此种pvc后就立即创建pv
		// User did not care which PV they get.
		delayBinding, err := pvutil.IsDelayBindingMode(claim, ctrl.classLister)

		volume, err := ctrl.volumes.findBestMatchForClaim(claim, delayBinding)
		if volume == nil {
			switch {
			case delayBinding && !pvutil.IsDelayBindingProvisioning(claim):
				if err = ctrl.emitEventForUnboundDelayBindingClaim(claim); err != nil {
					return err
			case storagehelpers.GetPersistentVolumeClaimClass(claim) != "":
				if err = ctrl.provisionClaim(claim); err != nil {
					return err
				return nil
				ctrl.eventRecorder.Event(claim, v1.EventTypeNormal, events.FailedBinding, "no persistent volumes available for this claim and no storage class is set")
			// Mark the claim as Pending and try to find a match in the next
			// periodic syncClaim
			if _, err = ctrl.updateClaimStatus(claim, v1.ClaimPending, nil); err != nil {
				return err
			return nil
		} else /* pv != nil */ {
			ctrl.bind(volume, claim)
	} else /* pvc.Spec.VolumeName != nil */ {
		obj, found, err := ctrl.volumes.store.GetByKey(claim.Spec.VolumeName)
		if !found {
			ctrl.updateClaimStatus(claim, v1.ClaimPending, nil)
		} else {//指定的PV存在,静态provision流程
			if volume.Spec.ClaimRef == nil {
				checkVolumeSatisfyClaim(volume, claim)
				ctrl.bind(volume, claim)
			} else if pvutil.IsVolumeBoundToClaim(volume, claim) {
				ctrl.bind(volume, claim)
			} else {
				//PV被其他pvc claim的场景
func (ctrl *PersistentVolumeController) provisionClaim(claim *v1.PersistentVolumeClaim) error
	plugin, storageClass, err := ctrl.findProvisionablePlugin(claim)
	if plugin == nil {
		//out-tree provision的处理,设置AnnBetaStorageProvisioner和AnnStorageProvisioner,external-provisoner会进行处理
		_, err = ctrl.provisionClaimOperationExternal(claim, storageClass)
	} else {
	    //in-tree provision的处理
		_, err = ctrl.provisionClaimOperation(claim, plugin, storageClass)

// findProvisionablePlugin finds a provisioner plugin for a given claim.
// It returns either the provisioning plugin or nil when an external
// provisioner is requested.
func (ctrl *PersistentVolumeController) findProvisionablePlugin(claim *v1.PersistentVolumeClaim) (vol.ProvisionableVolumePlugin, *storage.StorageClass, error) {
	// provisionClaim() which leads here is never called with claimClass=="", we
	// can save some checks.
	claimClass := storagehelpers.GetPersistentVolumeClaimClass(claim)
	class, err := ctrl.classLister.Get(claimClass)
	//根据class.Provisioner名字在in-tree volume plugin中查找,如果查到并且实现了接口ProvisionableVolumePlugin才返回此plugin,
	//否则返回err,比如对于in-tree nfs来说,它不支持接口ProvisionableVolumePlugin
	plugin, err := ctrl.volumePluginMgr.FindProvisionablePluginByName(class.Provisioner)
	if err != nil {
		if !strings.HasPrefix(class.Provisioner, "kubernetes.io/") {
			// External provisioner is requested, do not report error
			return nil, class, nil
		//没找到in-tree volume plugin,也不是外部provison,则返回err
		return nil, class, err
	//找到了in-tree volume plugin,则返回
	return plugin, class, nil

out-tree provison处理,只设置AnnBetaStorageProvisioner和AnnStorageProvisioner即可,external-provisoner会进行处理

func (ctrl *PersistentVolumeController) provisionClaimOperationExternal(
	claim *v1.PersistentVolumeClaim,
	storageClass *storage.StorageClass) (string, error) {
	provisionerName := storageClass.Provisioner
	// Add provisioner annotation so external provisioners know when to start
	newClaim, err := ctrl.setClaimProvisioner(claim, provisionerName)
		claimClone := claim.DeepCopy()
		// TODO: remove the beta storage provisioner anno after the deprecation period
		//AnnBetaStorageProvisioner = "volume.beta.kubernetes.io/storage-provisioner"
		//AnnStorageProvisioner     = "volume.kubernetes.io/storage-provisioner"
		metav1.SetMetaDataAnnotation(&claimClone.ObjectMeta, pvutil.AnnBetaStorageProvisioner, provisionerName)
		metav1.SetMetaDataAnnotation(&claimClone.ObjectMeta, pvutil.AnnStorageProvisioner, provisionerName)
		updateMigrationAnnotations(ctrl.csiMigratedPluginManager, ctrl.translator, claimClone.Annotations, true)
		newClaim, err := ctrl.kubeClient.CoreV1().PersistentVolumeClaims(claim.Namespace).Update(context.TODO(), claimClone, metav1.UpdateOptions{})

in-tree provison处理

// provisionClaimOperation provisions a volume. This method is running in
// standalone goroutine and already has all necessary locks.
func (ctrl *PersistentVolumeController) provisionClaimOperation(
	claim *v1.PersistentVolumeClaim,
	plugin vol.ProvisionableVolumePlugin,
	storageClass *storage.StorageClass) (string, error) {

	provisionerName := storageClass.Provisioner
	// Add provisioner annotation to be consistent with external provisioner workflow
	newClaim, err := ctrl.setClaimProvisioner(claim, provisionerName)
		claimClone := claim.DeepCopy()
		// TODO: remove the beta storage provisioner anno after the deprecation period
		//AnnBetaStorageProvisioner = "volume.beta.kubernetes.io/storage-provisioner"
		//AnnStorageProvisioner     = "volume.kubernetes.io/storage-provisioner"
		metav1.SetMetaDataAnnotation(&claimClone.ObjectMeta, pvutil.AnnBetaStorageProvisioner, provisionerName)
		metav1.SetMetaDataAnnotation(&claimClone.ObjectMeta, pvutil.AnnStorageProvisioner, provisionerName)
		updateMigrationAnnotations(ctrl.csiMigratedPluginManager, ctrl.translator, claimClone.Annotations, true)
		newClaim, err := ctrl.kubeClient.CoreV1().PersistentVolumeClaims(claim.Namespace).Update(context.TODO(), claimClone, metav1.UpdateOptions{})

	claim = newClaim

	options := vol.VolumeOptions{
		PersistentVolumeReclaimPolicy: *storageClass.ReclaimPolicy,
		MountOptions:                  storageClass.MountOptions,
		CloudTags:                     &tags,
		ClusterName:                   ctrl.clusterName,
		PVName:                        pvName,
		PVC:                           claim,
		Parameters:                    storageClass.Parameters,

	// Provision the volume
	provisioner, err := plugin.NewProvisioner(options)
	volume, err = provisioner.Provision(selectedNode, allowedTopologies)
	// Create Kubernetes PV object for the volume.
	if volume.Name == "" {
		volume.Name = pvName
	// Bind it to the claim
	volume.Spec.ClaimRef = claimRef
	volume.Status.Phase = v1.VolumeBound
	volume.Spec.StorageClassName = claimClass

	// Add AnnBoundByController (used in deleting the volume)
	//AnnBoundByController = "pv.kubernetes.io/bound-by-controller"
	metav1.SetMetaDataAnnotation(&volume.ObjectMeta, pvutil.AnnBoundByController, "yes")
	//AnnDynamicallyProvisioned = "pv.kubernetes.io/provisioned-by"
	metav1.SetMetaDataAnnotation(&volume.ObjectMeta, pvutil.AnnDynamicallyProvisioned, plugin.GetPluginName())
	ctrl.kubeClient.CoreV1().PersistentVolumes().Create(context.TODO(), volume, metav1.CreateOptions{})



// syncClaim checks if the claim should have a volume provisioned for it and
// provisions one if so. Returns an error if the claim is to be requeued.
func (ctrl *ProvisionController) syncClaim(ctx context.Context, obj interface{}) error {
	claim, ok := obj.(*v1.PersistentVolumeClaim)
	should, err := ctrl.shouldProvision(ctx, claim)
	if err != nil {
		ctrl.updateProvisionStats(claim, err, time.Time{})
		return err
	} else if should {
		status, err := ctrl.provisionClaimOperation(ctx, claim)

// shouldProvision returns whether a claim should have a volume provisioned for
// it, i.e. whether a Provision is "desired"
func (ctrl *ProvisionController) shouldProvision(ctx context.Context, claim *v1.PersistentVolumeClaim) (bool, error) {
	if claim.Spec.VolumeName != "" {
		return false, nil

	if qualifier, ok := ctrl.provisioner.(Qualifier); ok {
		if !qualifier.ShouldProvision(ctx, claim) {
			return false, nil

	provisioner, found := claim.Annotations[annStorageProvisioner]
	if !found {
		provisioner, found = claim.Annotations[annBetaStorageProvisioner]

	if found {
		if ctrl.knownProvisioner(provisioner) {
			claimClass := util.GetPersistentVolumeClaimClass(claim)
			class, err := ctrl.getStorageClass(claimClass)
			if err != nil {
				return false, err
			if class.VolumeBindingMode != nil && *class.VolumeBindingMode == storage.VolumeBindingWaitForFirstConsumer {
				// When claim is in delay binding mode, annSelectedNode is
				// required to provision volume.
				// Though PV controller set annStorageProvisioner only when
				// annSelectedNode is set, but provisioner may remove
				// annSelectedNode to notify scheduler to reschedule again.
				if selectedNode, ok := claim.Annotations[annSelectedNode]; ok && selectedNode != "" {
					return true, nil
				return false, nil
			return true, nil

	return false, nil

func (p *csiProvisioner) ShouldProvision(ctx context.Context, claim *v1.PersistentVolumeClaim) bool {
	provisioner, ok := claim.Annotations[annStorageProvisioner]
	if !ok {
		provisioner = claim.Annotations[annBetaStorageProvisioner]
	migratedTo := claim.Annotations[annMigratedTo]
	//pvc中指定的provisioner是否是csi driver
	if provisioner != p.driverName && migratedTo != p.driverName {
		// Non-migrated in-tree volume is requested.
		return false
	// Either CSI volume is requested or in-tree volume is migrated to CSI in PV controller
	// and therefore PVC has CSI annotation.
	// But before we start provisioning, check that we are (or can
	// become) the owner if there are multiple provisioner instances.
	// That we do this here is crucial because if we return false here,
	// the claim will be ignored without logging an event for it.
	// We don't want each provisioner instance to log events for the same
	// claim unless they really need to do some work for it.
	owned, err := p.checkNode(ctx, claim, nil, "should provision")
	if err == nil {
		if !owned {
			return false
	} else {
		// This is unexpected. Here we can only log it and let
		// a provisioning attempt start. If that still fails,
		// a proper event will be created.
		klog.V(2).Infof("trying to become an owner of PVC %s/%s in advance failed, will try again during provisioning: %s",
			claim.Namespace, claim.Name, err)

	// Start provisioning.
	return true

// knownProvisioner checks if provisioner name has been
// configured to provision volumes for
func (ctrl *ProvisionController) knownProvisioner(provisioner string) bool {
	if provisioner == ctrl.provisionerName {
		return true

	for _, p := range ctrl.additionalProvisionerNames {
		if p == provisioner {
			return true
	return false

func (ctrl *ProvisionController) provisionClaimOperation(ctx context.Context, claim *v1.PersistentVolumeClaim) (ProvisioningState, error) {
	volume, result, err := ctrl.provisioner.Provision(ctx, options)

	// Set ClaimRef and the PV controller will bind and set annBoundByController for us
	volume.Spec.ClaimRef = claimRef

	//AnnDynamicallyProvisioned = "pv.kubernetes.io/provisioned-by"
	metav1.SetMetaDataAnnotation(&volume.ObjectMeta, annDynamicallyProvisioned, class.Provisioner)
	volume.Spec.StorageClassName = claimClass

	ctrl.volumeStore.StoreVolume(claim, volume)

func (p *csiProvisioner) Provision(ctx context.Context, options controller.ProvisionOptions) (*v1.PersistentVolume, controller.ProvisioningState, error) {
	provisioner, ok := claim.Annotations[annStorageProvisioner]
	if !ok {
		provisioner = claim.Annotations[annBetaStorageProvisioner]
	if provisioner != p.driverName && claim.Annotations[annMigratedTo] != p.driverName {
		// The storage provisioner annotation may not equal driver name but the
		// PVC could have annotation "migrated-to" which is the new way to
		// signal a PVC is migrated (k8s v1.17+)
		return nil, controller.ProvisioningFinished, &controller.IgnoredError{
			Reason: fmt.Sprintf("PVC annotated with external-provisioner name %s does not match provisioner driver name %s. This could mean the PVC is not migrated",
	//调用csi driver的CreateVolume,创建volume
	rep, err := p.csiClient.CreateVolume(createCtx, req)
	pv := &v1.PersistentVolume{
		ObjectMeta: metav1.ObjectMeta{
			Name: pvName,
		Spec: v1.PersistentVolumeSpec{
			AccessModes:  options.PVC.Spec.AccessModes,
			MountOptions: options.StorageClass.MountOptions,
			Capacity: v1.ResourceList{
				v1.ResourceName(v1.ResourceStorage): bytesToQuantity(respCap),
			// TODO wait for CSI VolumeSource API
			PersistentVolumeSource: v1.PersistentVolumeSource{
				CSI: result.csiPVSource,
	return pv, controller.ProvisioningFinished, nil

func (b *backoffStore) StoreVolume(claim *v1.PersistentVolumeClaim, volume *v1.PersistentVolume) error {
	// Try to create the PV object several times
	b.client.CoreV1().PersistentVolumes().Create(context.Background(), volume, metav1.CreateOptions{})

当provisoner创建PV后,pv controller会监听到PV添加事件,调用syncVolume

func (ctrl *PersistentVolumeController) syncVolume(volume *v1.PersistentVolume)
	if volume.Spec.ClaimRef == nil {
	} else /* pv.Spec.ClaimRef != nil */ { //provisoner创建PV成功后,会设置pv.Spec.ClaimRef为pvc
		// Get the PVC by _name_
		var claim *v1.PersistentVolumeClaim
		claimName := claimrefToClaimKey(volume.Spec.ClaimRef)
		obj, found, err := ctrl.claims.GetByKey(claimName)
		claim, ok = obj.(*v1.PersistentVolumeClaim)

		if claim == nil {
		} else if claim.Spec.VolumeName == "" { //claim.Spec.VolumeName仍然为空,将claim添加到claimQueue,触发syncClaim
			return nil

func (ctrl *PersistentVolumeController) syncClaim(claim *v1.PersistentVolumeClaim) error
	if !metav1.HasAnnotation(claim.ObjectMeta, pvutil.AnnBindCompleted) {
		return ctrl.syncUnboundClaim(claim)
	} else {
		return ctrl.syncBoundClaim(claim)
func (ctrl *PersistentVolumeController) syncUnboundClaim(claim *v1.PersistentVolumeClaim) error
	if claim.Spec.VolumeName == "" {
		volume, err := ctrl.volumes.findBestMatchForClaim(claim, delayBinding)
		if volume == nil {
		} else /* pv != nil */ {
			// Found a PV for this claim
			// OBSERVATION: pvc is "Pending", pv is "Available"
			claimKey := claimToClaimKey(claim)
			klog.V(4).Infof("synchronizing unbound PersistentVolumeClaim[%s]: volume %q found: %s", claimKey, volume.Name, getVolumeStatusForLogging(volume))
			ctrl.bind(volume, claim)
	} else /* pvc.Spec.VolumeName != nil */ {

func (ctrl *PersistentVolumeController) bind(volume *v1.PersistentVolume, claim *v1.PersistentVolumeClaim) error {
	updatedVolume, err = ctrl.bindVolumeToClaim(volume, claim)
		volumeClone, dirty, err := pvutil.GetBindVolumeToClaim(volume, claim)
		// Save the volume only if something was changed
		if dirty {
			return ctrl.updateBindVolumeToClaim(volumeClone, true)
	volume = updatedVolume

	updatedVolume, err = ctrl.updateVolumePhase(volume, v1.VolumeBound, "")
		volumeClone := volume.DeepCopy()
		volumeClone.Status.Phase = phase
		volumeClone.Status.Message = message
		newVol, err := ctrl.kubeClient.CoreV1().PersistentVolumes().UpdateStatus(context.TODO(), volumeClone, metav1.UpdateOptions{})
	volume = updatedVolume
	updatedClaim, err = ctrl.bindClaimToVolume(claim, volume)
		dirty := false

		// Check if the claim was already bound (either by controller or by user)
		shouldBind := false
		if volume.Name != claim.Spec.VolumeName {
			shouldBind = true

		// The claim from method args can be pointing to watcher cache. We must not
		// modify these, therefore create a copy.
		claimClone := claim.DeepCopy()

		if shouldBind {
			dirty = true
			// Bind the claim to the volume
			claimClone.Spec.VolumeName = volume.Name

			//AnnBoundByController = "pv.kubernetes.io/bound-by-controller"
			// Set AnnBoundByController if it is not set yet
			if !metav1.HasAnnotation(claimClone.ObjectMeta, pvutil.AnnBoundByController) {
				metav1.SetMetaDataAnnotation(&claimClone.ObjectMeta, pvutil.AnnBoundByController, "yes")

		//AnnBindCompleted = "pv.kubernetes.io/bind-completed"
		// Set AnnBindCompleted if it is not set yet
		if !metav1.HasAnnotation(claimClone.ObjectMeta, pvutil.AnnBindCompleted) {
			metav1.SetMetaDataAnnotation(&claimClone.ObjectMeta, pvutil.AnnBindCompleted, "yes")
			dirty = true

		if dirty {
			klog.V(2).Infof("volume %q bound to claim %q", volume.Name, claimToClaimKey(claim))
			newClaim, err := ctrl.kubeClient.CoreV1().PersistentVolumeClaims(claim.Namespace).Update(context.TODO(), claimClone, metav1.UpdateOptions{})
	claim = updatedClaim
	updatedClaim, err = ctrl.updateClaimStatus(claim, v1.ClaimBound, volume)
	claim = updatedClaim


在prebind函数中,等待pod的所有pvc都变成bound后才继续往下进行(pv controller负责将pvc变成bound状态)

如果在pod yaml指定了unbound的pvc,则会在volumebinding的检查中失败,但是如果同时也指定了nodename,则此pod不会经过kube-scheduler的调度,也就不会在volumebinding中失败,而是在kubelet的getPVCExtractPV中检测到pvc是unbound后失败。

ad controler

ad controller的作用是对调度成功的pod所需要的volume执行attach操作,所以 ProbeAttachableVolumePlugins 只包括提供attach操作的插件,对于csi插件来说,attach操作只是创建对象volumeattachment,真正的attach操作由外部的csi-driver来实现

// ProcessPodVolumes processes the volumes in the given pod and adds them to the
// desired state of the world if addVolumes is true, otherwise it removes them.
func ProcessPodVolumes(pod *v1.Pod, addVolumes bool, desiredStateOfWorld cache.DesiredStateOfWorld, volumePluginMgr *volume.VolumePluginMgr, pvcLister corelisters.PersistentVolumeClaimLister, pvLister corelisters.PersistentVolumeLister, csiMigratedPluginManager csimigration.PluginManager, csiTranslator csimigration.InTreeToCSITranslator) {
	// Process volume spec for each volume defined in pod
	//遍历pod的volume,如果volume指定了pvc,则从pvc中获取pv,再获取pv spec,其中指定了使用的哪种plugin
	for _, podVolume := range pod.Spec.Volumes {
		volumeSpec, err := CreateVolumeSpec(podVolume, pod, nodeName, volumePluginMgr, pvcLister, pvLister, csiMigratedPluginManager, csiTranslator)

		attachableVolumePlugin, err :=
				volumePlugin, err := pm.FindPluginBySpec(spec)
					matches := []VolumePlugin{}
					for _, v := range pm.plugins {
						//比如nfs plugin的CanSupport,判断spec.PersistentVolume.Spec.NFS或者spec.Volume.NFS是否为空,不为空说明支持此spec
						if v.CanSupport(spec) {
							matches = append(matches, v)
					if len(matches) == 0 {
						return nil, fmt.Errorf("no volume plugin matched")
				if err != nil {
					return nil, err
				//对于csiPlugin,是支持AttachableVolumePlugin的,但是是否支持CanAttach还得看具体的csi driver,看下面对CanAttach的注释
				//对于nfs plugin是不支持的AttachableVolumePlugin
				if attachableVolumePlugin, ok := volumePlugin.(AttachableVolumePlugin); ok {
					if canAttach, err := attachableVolumePlugin.CanAttach(spec); err != nil {
						return nil, err
					} else if canAttach {
						return attachableVolumePlugin, nil
				return nil, nil
		if err != nil || attachableVolumePlugin == nil {
				"Skipping volume %q for pod %q/%q: it does not implement attacher interface. err=%v",

		uniquePodName := util.GetUniquePodName(pod)
		if addVolumes {
			// Add volume to desired state of world
			_, err := desiredStateOfWorld.AddPod(
				uniquePodName, pod, volumeSpec, nodeName)
		else {
			// Remove volume from desired state of world
			uniqueVolumeName, err := util.GetUniqueVolumeNameFromSpec(
				attachableVolumePlugin, volumeSpec)
				uniquePodName, uniqueVolumeName, nodeName)

func (p *csiPlugin) CanSupport(spec *volume.Spec) bool {
	// TODO (vladimirvivien) CanSupport should also take into account
	// the availability/registration of specified Driver in the volume source
	if spec == nil {
		return false
	if utilfeature.DefaultFeatureGate.Enabled(features.CSIInlineVolume) {
		return (spec.PersistentVolume != nil && spec.PersistentVolume.Spec.CSI != nil) ||
			(spec.Volume != nil && spec.Volume.CSI != nil)

	return spec.PersistentVolume != nil && spec.PersistentVolume.Spec.CSI != nil

//csiPlugin的CanAttach,通过pv指定的Driver获取csidriver,csidriver只指定了是否需要attach,对于nfs csidriver来说是不支持的
func (p *csiPlugin) CanAttach(spec *volume.Spec) (bool, error) {
	pvSrc, err := getCSISourceFromSpec(spec)
	if err != nil {
		return false, err

	driverName := pvSrc.Driver

	skipAttach, err := p.skipAttach(driverName)

	return !skipAttach, nil

// skipAttach looks up CSIDriver object associated with driver name
// to determine if driver requires attachment volume operation
func (p *csiPlugin) skipAttach(driver string) (bool, error) {
	csiDriver, err := p.csiDriverLister.Get(driver)
	if csiDriver.Spec.AttachRequired != nil && *csiDriver.Spec.AttachRequired == false {
		return true, nil
	return false, nil

root@master:~/nfs/dy_out# kubectl describe csiDriver nfs.csi.k8s.io
Name:         nfs.csi.k8s.io
Labels:       <none>
Annotations:  <none>
API Version:  storage.k8s.io/v1
Kind:         CSIDriver
  Creation Timestamp:  2022-11-16T13:59:54Z
  Managed Fields:
    API Version:  storage.k8s.io/v1
    Fields Type:  FieldsV1
    Manager:         kubectl-client-side-apply
    Operation:       Update
    Time:            2022-11-16T13:59:54Z
  Resource Version:  1185243
  UID:               d756aba2-0fb5-429f-8e6e-eb2517b44fd7
  Attach Required:     false   //指定了nfs不需要attach操作
  Fs Group Policy:     File
  Pod Info On Mount:   false
  Requires Republish:  false
  Storage Capacity:    false
  Volume Lifecycle Modes:
Events:  <none>

在ad controller reconcile过程中,调用插件的attach接口

reconcile -> attachDesiredVolumes -> AttachVolume
	attachableVolumePlugin, err := og.volumePluginMgr.FindAttachablePluginBySpec(volumeToAttach.VolumeSpec)
	volumeAttacher, newAttacherErr := attachableVolumePlugin.NewAttacher()
	// Execute attach
	devicePath, attachErr := volumeAttacher.Attach(volumeToAttach.VolumeSpec, volumeToAttach.NodeName)

//对于csi plugin来说
func (p *csiPlugin) NewAttacher() (volume.Attacher, error) {
	return p.newAttacherDetacher()

func (c *csiAttacher) Attach(spec *volume.Spec, nodeName types.NodeName) (string, error) {
	if spec == nil {
		klog.Error(log("attacher.Attach missing volume.Spec"))
		return "", errors.New("missing spec")

	pvSrc, err := getPVSourceFromSpec(spec)
	if err != nil {
		return "", errors.New(log("attacher.Attach failed to get CSIPersistentVolumeSource: %v", err))

	node := string(nodeName)
	attachID := getAttachmentName(pvSrc.VolumeHandle, pvSrc.Driver, node)

	attachment, err := c.plugin.volumeAttachmentLister.Get(attachID)
	if err != nil && !apierrors.IsNotFound(err) {
		return "", errors.New(log("failed to get volume attachment from lister: %v", err))

	if attachment == nil {
		var vaSrc storage.VolumeAttachmentSource
		if spec.InlineVolumeSpecForCSIMigration {
			// inline PV scenario - use PV spec to populate VA source.
			// The volume spec will be populated by CSI translation API
			// for inline volumes. This allows fields required by the CSI
			// attacher such as AccessMode and MountOptions (in addition to
			// fields in the CSI persistent volume source) to be populated
			// as part of CSI translation for inline volumes.
			vaSrc = storage.VolumeAttachmentSource{
				InlineVolumeSpec: &spec.PersistentVolume.Spec,
		} else {
			// regular PV scenario - use PV name to populate VA source
			pvName := spec.PersistentVolume.GetName()
			vaSrc = storage.VolumeAttachmentSource{
				PersistentVolumeName: &pvName,

		attachment := &storage.VolumeAttachment{
			ObjectMeta: meta.ObjectMeta{
				Name: attachID,
			Spec: storage.VolumeAttachmentSpec{
				NodeName: node,
				Attacher: pvSrc.Driver,
				Source:   vaSrc,

		_, err = c.k8s.StorageV1().VolumeAttachments().Create(context.TODO(), attachment, metav1.CreateOptions{})

	// Attach and detach functionality is exclusive to the CSI plugin that runs in the AttachDetachController,
	// and has access to a VolumeAttachment lister that can be polled for the current status.
	if err := c.waitForVolumeAttachmentWithLister(pvSrc.VolumeHandle, attachID, c.watchTimeout); err != nil {
		return "", err

	klog.V(4).Info(log("attacher.Attach finished OK with VolumeAttachment object [%s]", attachID))

	// Don't return attachID as a devicePath. We can reconstruct the attachID using getAttachmentName()
	return "", nil



// syncVA deals with one key off the queue.  It returns false when it's time to quit.
func (ctrl *CSIAttachController) syncVA()
	key, quit := ctrl.vaQueue.Get()
func (h *csiHandler) SyncNewOrUpdatedVolumeAttachment(va *storage.VolumeAttachment)
		va, metadata, err := h.csiAttach(va)
			// We're not interested in `detached` return value, the controller will
			// issue Detach to be sure the volume is really detached.
			publishInfo, _, err := h.attacher.Attach(ctx, volumeHandle, readOnly, nodeID, volumeCapabilities, attributes, secrets)

func (a *attacher) Attach(ctx context.Context, volumeID string, readOnly bool, nodeID string, caps *csi.VolumeCapability, context, secrets map[string]string) (metadata map[string]string, detached bool, err error) {
	req := csi.ControllerPublishVolumeRequest{
		VolumeId:         volumeID,
		NodeId:           nodeID,
		VolumeCapability: caps,
		Readonly:         readOnly,
		VolumeContext:    context,
		Secrets:          secrets,

	rsp, err := a.client.ControllerPublishVolume(ctx, &req)
	if err != nil {
		return nil, isFinalError(err), err
	return rsp.PublishContext, false, nil

volumeManager mount


func (dswp *desiredStateOfWorldPopulator) Run(sourcesReady config.SourcesReady, stopCh <-chan struct{}) {
	// Wait for the completion of a loop that started after sources are all ready, then set hasAddedPods accordingly
	klog.InfoS("Desired state populator starts to run")
	wait.PollUntil(dswp.loopSleepDuration, func() (bool, error) {
		done := sourcesReady.AllReady()
		return done, nil
	}, stopCh)
	dswp.hasAddedPods = true
	wait.Until(dswp.populatorLoop, dswp.loopSleepDuration, stopCh)

func (dswp *desiredStateOfWorldPopulator) populatorLoop() {

// Iterate through all pods and add to desired state of world if they don't
// exist but should
func (dswp *desiredStateOfWorldPopulator) findAndAddNewPods() {
	processedVolumesForFSResize := sets.NewString()
	for _, pod := range dswp.podManager.GetPods() {
		if dswp.podStateProvider.ShouldPodContainersBeTerminating(pod.UID) {
			// Do not (re)add volumes for pods that can't also be starting containers
		dswp.processPodVolumes(pod, mountedVolumesForPod, processedVolumesForFSResize)

// processPodVolumes processes the volumes in the given pod and adds them to the
// desired state of the world.
func (dswp *desiredStateOfWorldPopulator) processPodVolumes(
	pod *v1.Pod,
	mountedVolumesForPod map[volumetypes.UniquePodName]map[string]cache.MountedVolume,
	processedVolumesForFSResize sets.String) {
	// Process volume spec for each volume defined in pod
	for _, podVolume := range pod.Spec.Volumes {
		// Add volume to desired state of world
		uniqueVolumeName, err := dswp.desiredStateOfWorld.AddPodToVolume(
			uniquePodName, pod, volumeSpec, podVolume.Name, volumeGidValue)

func (rc *reconciler) reconcile() {
	// Unmounts are triggered before mounts so that a volume that was
	// referenced by a pod that was deleted and is now referenced by another
	// pod is unmounted from the first pod before being mounted to the new
	// pod.

	// Next we mount required volumes. This function could also trigger
	// attach if kubelet is responsible for attaching volumes.
	// If underlying PVC was resized while in-use then this function also handles volume
	// resizing.

	// Ensure devices that should be detached/unmounted are detached/unmounted.

func (rc *reconciler) mountAttachVolumes() {
	// Ensure volumes that should be attached/mounted are attached/mounted.
	for _, volumeToMount := range rc.desiredStateOfWorld.GetVolumesToMount() {
			err := rc.operationExecutor.MountVolume(

MountVolume -> GenerateMountVolumeFunc

		volumeMounter, newMounterErr := volumePlugin.NewMounter(

		// get deviceMounter, if possible
		deviceMountableVolumePlugin, _ := og.volumePluginMgr.FindDeviceMountablePluginBySpec(volumeToMount.VolumeSpec)
		var volumeDeviceMounter volume.DeviceMounter
		if deviceMountableVolumePlugin != nil {
			volumeDeviceMounter, _ = deviceMountableVolumePlugin.NewDeviceMounter()
		// Mount device to global mount path
		err = volumeDeviceMounter.MountDevice(
			volume.DeviceMounterArgs{FsGroup: fsGroup},
		// Execute mount
		mountErr := volumeMounter.SetUp(volume.MounterArgs{
			FsUser:              util.FsUserFrom(volumeToMount.Pod),
			FsGroup:             fsGroup,
			DesiredSize:         volumeToMount.DesiredSizeLimit,
			FSGroupChangePolicy: fsGroupChangePolicy,

func (c *csiAttacher) MountDevice(spec *volume.Spec, devicePath string, deviceMountPath string, deviceMounterArgs volume.DeviceMounterArgs) error {
	err = csi.NodeStageVolume(ctx,


// SetUp attaches the disk and bind mounts to the volume path.
func (nfsMounter *nfsMounter) SetUp(mounterArgs volume.MounterArgs) error {
	return nfsMounter.SetUpAt(nfsMounter.GetPath(), mounterArgs)


func (c *csiMountMgr) SetUp(mounterArgs volume.MounterArgs) error {
	return c.SetUpAt(c.GetPath(), mounterArgs)

func (c *csiMountMgr) SetUpAt(dir string, mounterArgs volume.MounterArgs) error {

	err = csi.NodePublishVolume(


	klog.V(4).Infof(log("mounter.SetUp successfully requested NodePublish [%s]", dir))
	return nil

//调用csi plugin的NodePublishVolume进行mount操作
func (c *csiDriverClient) NodePublishVolume(...)
	nodeClient.NodePublishVolume(ctx, req)

// NodePublishVolume mount the volume
func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) {
	ns.mounter.Mount(source, targetPath, "nfs", mountOptions)

func (kl *Kubelet) syncPod(ctx context.Context, updateType kubetypes.SyncPodType, pod, mirrorPod *v1.Pod, podStatus *kubecontainer.PodStatus)
	//等待volume attach/mount成功
	// Wait for volumes to attach/mount
	// Fetch the pull secrets for the pod
	pullSecrets := kl.getPullSecretsForPod(pod)

	// Call the container runtime's SyncPod callback
	result := kl.containerRuntime.SyncPod(pod, podStatus, pullSecrets, kl.backOff)

func (vm *volumeManager) WaitForAttachAndMount(pod *v1.Pod) error {
	if pod == nil {
		return nil

	expectedVolumes := getExpectedVolumes(pod)
	if len(expectedVolumes) == 0 {
		// No volumes to verify
		return nil

	klog.V(3).InfoS("Waiting for volumes to attach and mount for pod", "pod", klog.KObj(pod))
	uniquePodName := util.GetUniquePodName(pod)

	// Some pods expect to have Setup called over and over again to update.
	// Remount plugins for which this is true. (Atomically updating volumes,
	// like Downward API, depend on this to update the contents of the volume).

	err := wait.PollImmediate(
		vm.verifyVolumesMountedFunc(uniquePodName, expectedVolumes))

	if err != nil {
		unmountedVolumes :=
			vm.getUnmountedVolumes(uniquePodName, expectedVolumes)
		// Also get unattached volumes for error message
		unattachedVolumes :=

		if len(unmountedVolumes) == 0 {
			return nil

		return fmt.Errorf(
			"unmounted volumes=%v, unattached volumes=%v: %s",

	klog.V(3).InfoS("All volumes are attached and mounted for pod", "pod", klog.KObj(pod))
	return nil

// verifyVolumesMountedFunc returns a method that returns true when all expected
// volumes are mounted.
func (vm *volumeManager) verifyVolumesMountedFunc(podName types.UniquePodName, expectedVolumes []string) wait.ConditionFunc {
	return func() (done bool, err error) {
		if errs := vm.desiredStateOfWorld.PopPodErrors(podName); len(errs) > 0 {
			return true, errors.New(strings.Join(errs, "; "))
		return len(vm.getUnmountedVolumes(podName, expectedVolumes)) == 0, nil
