【kubernetes/k8s源码分析】 rook operator启动源码分析

https://github.com/rook/rook

 

     operator:作用启动监控存储集群,包括mon,osd,mgr等服务,可以支持 block 块存储,file system 文件存储,object 对象存储 (S3/Swift)。 启动包括ceph agent 与 discover服务,agent 包含 Flexvolume plugin,包括attaching network storage devices, mounting volumes, and formating the filesystem 三种操作。

 

启动命令

Usage:
  rook ceph operator [flags]

启动参数

     包括的ceph镜像包括attacher,provisioner,csi-node-driver-registrar,cephfsplugin,rbdplugin

     包括的配置存入/etc/ceph-csi目录,

  • --mon-healthcheck-interval:mon健康监测间隔,默认45s
  • --mon-out-timeout:mon超时时间,默认未10m

   --alsologtostderr=false, --csi-attacher-image=quay.io/k8scsi/csi-attacher:v1.0.1,

--csi-cephfs-image=quay.io/cephcsi/cephfsplugin:v1.0.0,

--csi-cephfs-plugin-template-path=/etc/ceph-csi/cephfs/csi-cephfsplugin.yaml,

--csi-cephfs-provisioner-template-path=/etc/ceph-csi/cephfs/csi-cephfsplugin-provisioner.yaml,

--csi-enable-cephfs=false, --csi-enable-rbd=false,

--csi-provisioner-image=quay.io/k8scsi/csi-provisioner:v1.0.1,

--csi-rbd-image=quay.io/cephcsi/rbdplugin:v1.0.0,

--csi-rbd-plugin-template-path=/etc/ceph-csi/rbd/csi-rbdplugin.yaml,

--csi-rbd-provisioner-template-path=/etc/ceph-csi/rbd/csi-rbdplugin-provisioner.yaml,

--csi-registrar-image=quay.io/k8scsi/csi-node-driver-registrar:v1.0.2,

--csi-snapshotter-image=quay.io/k8scsi/csi-snapshotter:v1.0.1,

--help=false, --log-flush-frequency=5s, --log-level=INFO,

--log_backtrace_at=:0, --log_dir=, --log_file=, --logtostderr=true,

--mon-healthcheck-interval=45s, --mon-out-timeout=10m0s,

--skip_headers=false, --stderrthreshold=2, --v=0, --vmodule=

 

1. startOperator函数

  1.1 建立kube apiserver的客户端,比较容易理解

clientset, apiExtClientset, rookClientset, err := rook.GetClientset()
if err != nil {
   rook.TerminateFatal(fmt.Errorf("failed to get k8s client. %+v\n", err))
}

  1.2 创建上下文,比如网络信息,路径设置为/var/lib/rook,客户端

context := createContext()
context.NetworkInfo = clusterd.NetworkInfo{}
context.ConfigDir = k8sutil.DataDir
context.Clientset = clientset
context.APIExtensionClientset = apiExtClientset
context.RookClientset = rookClientset

  1.3 获得运行operator进程的pod

       namespace为rook-ceph,podname以及镜像rook/ceph:master

// Using the current image version to deploy other rook pods
pod, err := k8sutil.GetRunningPod(clientset)
if err != nil {
	rook.TerminateFatal(fmt.Errorf("failed to get pod. %+v\n", err))
}

rookImage, err := k8sutil.GetContainerImage(pod, containerName)
if err != nil {
	rook.TerminateFatal(fmt.Errorf("failed to get container image. %+v\n", err))
}

  1.4 创建operator实例

// New creates an operator instance
func New(context *clusterd.Context, volumeAttachmentWrapper attachment.Attachment, rookImage, securityAccount string) *Operator {
	clusterController := cluster.NewClusterController(context, rookImage, volumeAttachmentWrapper)

	schemes := []opkit.CustomResource{cluster.ClusterResource, pool.PoolResource, object.ObjectStoreResource, objectuser.ObjectStoreUserResource,
		file.FilesystemResource, attachment.VolumeResource}
	return &Operator{
		context:           context,
		clusterController: clusterController,
		resources:         schemes,
		rookImage:         rookImage,
		securityAccount:   securityAccount,
	}
}

 

2. agent Start函数

  主要工作是创建daemonset的pod

rookAgent := agent.New(o.context.Clientset)

if err := rookAgent.Start(namespace, o.rookImage, o.securityAccount); err != nil {
	return fmt.Errorf("Error starting agent daemonset: %v", err)
}

  2.1 createAgentDaemonSet函数

     2.1.1 FLEXVOLUME_DIR_PATH变量默认为/usr/libexec/kubernetes/kubelet-plugins/volume/exec/

     curl localhost:8080/api/v1/nodes/master-node/proxy/configz

{
    "kubeletconfig":{
        "syncFrequency":"1m0s",
        "fileCheckFrequency":"20s",
        "httpCheckFrequency":"20s",
        "address":"192.168.74.57",
        "port":10250,
        "readOnlyPort":10255,
        "tlsCertFile":"/etc/kubernetes/ssl/kubelet.crt",
        "tlsPrivateKeyFile":"/etc/kubernetes/ssl/kubelet.key",
        "authentication":{
            "x509":{

            },
            "webhook":{
                "enabled":false,
                "cacheTTL":"2m0s"
            },
            "anonymous":{
                "enabled":true
            }
        },
        "authorization":{
            "mode":"AlwaysAllow",
            "webhook":{
                "cacheAuthorizedTTL":"5m0s",
                "cacheUnauthorizedTTL":"30s"
            }
        },
        "registryPullQPS":5,
        "registryBurst":10,
        "eventRecordQPS":5,
        "eventBurst":10,
        "enableDebuggingHandlers":true,
        "healthzPort":10248,
        "healthzBindAddress":"127.0.0.1",
        "oomScoreAdj":-999,
        "clusterDomain":"cluster.local.",
        "clusterDNS":[
            "10.200.254.254"
        ],
        "streamingConnectionIdleTimeout":"4h0m0s",
        "nodeStatusUpdateFrequency":"10s",
        "nodeStatusReportFrequency":"1m0s",
        "nodeLeaseDurationSeconds":40,
        "imageMinimumGCAge":"2m0s",
        "imageGCHighThresholdPercent":85,
        "imageGCLowThresholdPercent":80,
        "volumeStatsAggPeriod":"1m0s",
        "cgroupsPerQOS":true,
        "cgroupDriver":"cgroupfs",
        "cpuManagerPolicy":"none",
        "cpuManagerReconcilePeriod":"10s",
        "runtimeRequestTimeout":"2m0s",
        "hairpinMode":"hairpin-veth",
        "maxPods":110,
        "podPidsLimit":-1,
        "resolvConf":"/etc/resolv.conf",
        "cpuCFSQuota":true,
        "cpuCFSQuotaPeriod":"100ms",
        "maxOpenFiles":1000000,
        "contentType":"application/vnd.kubernetes.protobuf",
        "kubeAPIQPS":5,
        "kubeAPIBurst":10,
        "serializeImagePulls":true,
        "evictionHard":{
            "imagefs.available":"15%",
            "memory.available":"100Mi",
            "nodefs.available":"10%",
            "nodefs.inodesFree":"5%"
        },

        "evictionPressureTransitionPeriod":"5m0s",
        "enableControllerAttachDetach":true,
        "makeIPTablesUtilChains":true,
        "iptablesMasqueradeBit":14,
        "iptablesDropBit":15,
        "failSwapOn":false,
        "containerLogMaxSize":"10Mi",
        "containerLogMaxFiles":5,
        "configMapAndSecretChangeDetectionStrategy":"Watch",
        "enforceNodeAllocatable":[
            "pods"
        ]
    }
}

flexvolumeDirPath, source := a.discoverFlexvolumeDir()
logger.Infof("discovered flexvolume dir path from source %s. value: %s", source, flexvolumeDirPath)

    2.1.2 agent mount security mode设置为Any

agentMountSecurityMode := os.Getenv(AgentMountSecurityModeEnv)
if agentMountSecurityMode == "" {
	logger.Infof("no agent mount security mode given, defaulting to '%s' mode", MountSecurityModeAny)
	agentMountSecurityMode = MountSecurityModeAny
}
if agentMountSecurityMode != MountSecurityModeAny && agentMountSecurityMode != MountSecurityModeRestricted {
	return fmt.Errorf("invalid agent mount security mode specified (given: %s)", agentMountSecurityMode)
}

    2.1.3 创建daemonset名为rook-ceph-agent,启动参数为ceph agent,特权模式,挂盘设置,/flexmnt /dev /sys /lib/modules

      简单粗暴直接构建daemon结构

    2.1.4 调用client-go API创建daemonset,比较简单

 

3. discover Start函数

  3.1 createDiscoverDaemonSet

    这个比较简单,调用client-go API直接创建名为disove的daemonset

 

4.  开启CSI driver

    如果operator开启了ROOK_CSI_ENABLE_CEPHFS或者ROOK_CSI_ENABLE_RBD则执行CSI流程

# CSI enablement
- name: ROOK_CSI_ENABLE_CEPHFS
  value: "true"
- name: ROOK_CSI_CEPHFS_IMAGE
  value: "quay.io/cephcsi/cephfsplugin:v1.0.0"
- name: ROOK_CSI_ENABLE_RBD
  value: "true"
- name: ROOK_CSI_RBD_IMAGE
  value: "quay.io/cephcsi/rbdplugin:v1.0.0"
- name: ROOK_CSI_REGISTRAR_IMAGE
  value: "quay.io/k8scsi/csi-node-driver-registrar:v1.0.2"
- name: ROOK_CSI_PROVISIONER_IMAGE
  value: "quay.io/k8scsi/csi-provisioner:v1.0.1"
- name: ROOK_CSI_SNAPSHOTTER_IMAGE
  value: "quay.io/k8scsi/csi-snapshotter:v1.0.1"
- name: ROOK_CSI_ATTACHER_IMAGE
  value: "quay.io/k8scsi/csi-attacher:v1.0.1"

      调用StartCSIDrivers执行主要逻辑

if serverVersion.Major >= csi.KubeMinMajor && serverVersion.Minor >= csi.KubeMinMinor && csi.CSIEnabled() {
	logger.Infof("Ceph CSI driver is enabled, validate csi param")
	if err = csi.ValidateCSIParam(); err != nil {
		logger.Warningf("invalid csi params: %v", err)
		if csi.ExitOnError {
			return err
		}
	} else {
		csi.SetCSINamespace(namespace)
		if err = csi.StartCSIDrivers(namespace, o.context.Clientset); err != nil {
			logger.Warningf("failed to start Ceph csi drivers: %v", err)
			if csi.ExitOnError {
				return err
			}
		} else {
			logger.Infof("successfully started Ceph csi drivers")
		}
	}
}

 

5. StartCSIDrivers函数

    5.1 rbdplugin模板生成daemonset配置

      默认路径 /etc/ceph-csi/rbd/csi-rbdplugin.yaml,将变量的地方替换未CSIParam全局变量中的数据

      启动包括driver-registart,csi-rbdplugin两个容器共用一个pod,使用unix:///csi/csi.sock通信

kind: DaemonSet
apiVersion: apps/v1
metadata:
  name: csi-rbdplugin
  namespace: {{ .Namespace }}  
spec:
  selector:
    matchLabels:
      app: csi-rbdplugin
  template:
    metadata:
      labels:
        app: csi-rbdplugin
    spec:
      serviceAccount: rook-csi-rbd-plugin-sa
      hostNetwork: true
      hostPID: true      
      # to use e.g. Rook orchestrated cluster, and mons' FQDN is
      # resolved through k8s service, set dns policy to cluster first
      dnsPolicy: ClusterFirstWithHostNet      
      containers:
        - name: driver-registrar
          image: {{ .RegistrarImage }}
          args:
            - "--v=5"
            - "--csi-address=/csi/csi.sock"
            - "--kubelet-registration-path=/var/lib/kubelet/plugins/rbd.csi.ceph.com/csi.sock"
          lifecycle:
            preStop:
              exec:
                  command: ["/bin/sh", "-c", "rm -rf /registration/csi-rbdplugin /registration/csi-rbdplugin-reg.sock"]          
          env:
            - name: KUBE_NODE_NAME
              valueFrom:
                fieldRef:
                  fieldPath: spec.nodeName
          volumeMounts:
            - name: plugin-dir
              mountPath: /csi
            - name: registration-dir
              mountPath: /registration
        - name: csi-rbdplugin
          securityContext:
            privileged: true
            capabilities:
              add: ["SYS_ADMIN"]
            allowPrivilegeEscalation: true
          image: {{ .RBDPluginImage }}
          args :
            - "--nodeid=$(NODE_ID)"
            - "--endpoint=$(CSI_ENDPOINT)"
            - "--v=5"
            - "--drivername=rbd.csi.ceph.com"
            - "--containerized=true"
            - "--metadatastorage=k8s_configmap"
          env:
            - name: HOST_ROOTFS
              value: "/rootfs" 
            - name: NODE_ID
              valueFrom:
                fieldRef:
                  fieldPath: spec.nodeName
            - name: POD_NAMESPACE
              valueFrom:
                fieldRef:
                  fieldPath: metadata.namespace
            - name: CSI_ENDPOINT
              value: unix:///csi/csi.sock
          imagePullPolicy: "IfNotPresent"
func templateToDaemonSet(name, templatePath string) (*apps.DaemonSet, error) {
	var ds apps.DaemonSet
	t, err := loadTemplate(name, templatePath)
	if err != nil {
		return nil, err
	}

	err = yaml.Unmarshal([]byte(t), &ds)
	if err != nil {
		return nil, err
	}
	return &ds, nil
}

     5.2 rbd-provisioner模板生成statefulel配置

        默认路径 /etc/ceph-csi/rbd/csi-rbdplugin-provisioner.yaml

        启动了statefulset的pod,包括容器provisioner,attacher,snapshotter,rbdplugin四个容器,使用unix:///csi/csi-provisioner.sock通信

kind: StatefulSet
apiVersion: apps/v1
metadata:
  name: csi-rbdplugin-provisioner
  namespace: {{ .Namespace }}
spec:
  serviceName: "csi-rbdplugin-provisioner"
  replicas: 1
  selector:
    matchLabels:
     app: csi-rbdplugin-provisioner
  template:
    metadata:
      labels:
        app: csi-rbdplugin-provisioner
    spec:
      serviceAccount: rook-csi-rbd-provisioner-sa
      containers:
        - name: csi-provisioner
          image: {{ .ProvisionerImage }}
          args:
            - "--csi-address=$(ADDRESS)"
            - "--v=5"
          env:
            - name: ADDRESS
              value: unix:///csi/csi-provisioner.sock
          imagePullPolicy: "IfNotPresent"
          volumeMounts:
            - name: socket-dir
              mountPath: /csi
        - name: csi-rbdplugin-attacher
          image: {{ .AttacherImage }}
          args:
            - "--v=5"
            - "--csi-address=$(ADDRESS)"
          env:
            - name: ADDRESS
              value: /csi/csi-provisioner.sock
          imagePullPolicy: "IfNotPresent"
          volumeMounts:
            - name: socket-dir
              mountPath: /csi
        - name: csi-snapshotter
          image:  {{ .SnapshotterImage }}
          args:
            - "--csi-address=$(ADDRESS)"
            - "--connection-timeout=15s"
            - "--v=5"
          env:
            - name: ADDRESS
              value: unix:///csi/csi-provisioner.sock
          imagePullPolicy: Always
          securityContext:
            privileged: true
          volumeMounts:
            - name: socket-dir
              mountPath: /csi
        - name: csi-rbdplugin
          securityContext:
            privileged: true
            capabilities:
              add: ["SYS_ADMIN"]
          image: {{ .RBDPluginImage }}
          args :
            - "--nodeid=$(NODE_ID)"
            - "--endpoint=$(CSI_ENDPOINT)"
            - "--v=5"
            - "--drivername=rbd.csi.ceph.com"
            - "--containerized=true"
            - "--metadatastorage=k8s_configmap"
          env:
            - name: HOST_ROOTFS
              value: "/rootfs"
            - name: NODE_ID
              valueFrom:
                fieldRef:
                  fieldPath: spec.nodeName
            - name: POD_NAMESPACE
              valueFrom:
                fieldRef:
                  fieldPath: metadata.namespace
            - name: CSI_ENDPOINT
              value: unix:///csi/csi-provisioner.sock
          imagePullPolicy: "IfNotPresent"
func templateToStatefulSet(name, templatePath string) (*apps.StatefulSet, error) {
	var ss apps.StatefulSet
	t, err := loadTemplate(name, templatePath)
	if err != nil {
		return nil, err
	}

	err = yaml.Unmarshal([]byte(t), &ss)
	if err != nil {
		return nil, err
	}
	return &ss, nil
}

    如果开启cephfs也同样处理

 

    5.3 创建rbdPlugin的daemonset,创建rdbprovisioner的statefuleSet

if rbdPlugin != nil {
	err = k8sutil.CreateDaemonSet("csi rbd plugin", namespace, clientset, rbdPlugin)
	if err != nil {
		return fmt.Errorf("failed to start rbdplugin daemonset: %v\n%v", err, rbdPlugin)
	}
}
if rbdProvisioner != nil {
	_, err = k8sutil.CreateStatefulSet("csi rbd provisioner", namespace, "csi-rbdplugin-provisioner", clientset, rbdProvisioner)
	if err != nil {
		return fmt.Errorf("failed to start rbd provisioner statefulset: %v\n%v", err, rbdProvisioner)
	}

}

    5.4 同样创建cephfs的daemon与provisioner

if cephfsPlugin != nil {
	err = k8sutil.CreateDaemonSet("csi cephfs plugin", namespace, clientset, cephfsPlugin)
	if err != nil {
		return fmt.Errorf("failed to start cephfs plugin daemonset: %v\n%v", err, cephfsPlugin)
	}
}
if cephfsProvisioner != nil {
	_, err = k8sutil.CreateStatefulSet("csi cephfs provisioner", namespace, "csi-cephfsplugin-provisioner", clientset, cephfsProvisioner)
	if err != nil {
		return fmt.Errorf("failed to start cephfs provisioner statefulset: %v\n%v", err, cephfsProvisioner)
	}

}

 

provisionerName       = "ceph.rook.io/block"
provisionerNameLegacy = "rook.io/block"

6. NewProvisionController

    RookVolumeProvisioner实现了Provisioner接口,包括方法Provision与Delete

    与controller manager形式一样,创建controller

// NewProvisionController creates a new provision controller using
// the given configuration parameters and with private (non-shared) informers.
func NewProvisionController(
	client kubernetes.Interface,
	provisionerName string,
	provisioner Provisioner,
	kubeVersion string,
	options ...func(*ProvisionController) error,
) *ProvisionController {
	id, err := os.Hostname()
	if err != nil {
		glog.Fatalf("Error getting hostname: %v", err)
	}

    两个队列,claims与volumes,也就是pv pvc资源队列

	controller.claimQueue = workqueue.NewNamedRateLimitingQueue(rateLimiter, "claims")
	controller.volumeQueue = workqueue.NewNamedRateLimitingQueue(rateLimiter, "volumes")

    PVC回调函数Add Update Delete

	// ----------------------
	// PersistentVolumeClaims

	claimHandler := cache.ResourceEventHandlerFuncs{
		AddFunc:    func(obj interface{}) { controller.enqueueClaim(obj) },
		UpdateFunc: func(oldObj, newObj interface{}) { controller.enqueueClaim(newObj) },
		DeleteFunc: func(obj interface{}) {
			// NOOP. The claim is either in claimsInProgress and in the queue, so it will be processed as usual
			// or it's not in claimsInProgress and then we don't care
		},
	}

    PV回调函数

	// -----------------
	// PersistentVolumes

	volumeHandler := cache.ResourceEventHandlerFuncs{
		AddFunc:    func(obj interface{}) { controller.enqueueVolume(obj) },
		UpdateFunc: func(oldObj, newObj interface{}) { controller.enqueueVolume(newObj) },
		DeleteFunc: func(obj interface{}) { controller.forgetVolume(obj) },
	}

       storageclass资源

	// --------------
	// StorageClasses

	// no resource event handler needed for StorageClasses
	if controller.classInformer == nil {
		if controller.kubeVersion.AtLeast(utilversion.MustParseSemantic("v1.6.0")) {
			controller.classInformer = informer.Storage().V1().StorageClasses().Informer()
		} else {
			controller.classInformer = informer.Storage().V1beta1().StorageClasses().Informer()
		}
	}
	controller.classes = controller.classInformer.GetStore()

      剩下的流程与其他controller manger中处理一样,可以参考https://blog.csdn.net/zhonglinzhang/article/details/89915182文章

 

7. 开启watch rook cluster功能

    也就是CephCluster资源,这篇文章分析

    https://blog.csdn.net/zhonglinzhang/article/details/89845140

// watch for changes to the rook clusters
o.clusterController.StartWatch(namespaceToWatch, stopChan)

 

总结

    operator启动创建agent与discover的daemonset的服务

    如果开启ceph csi则创建CSI插件,statefulset与daemon

    创建controller,使用informer机制关注pv pvc storageclass

    开启watch CephCluster资源进行处理

   

你可能感兴趣的:(kubernetes,CSI,存储)