https://github.com/rook/rook
operator:作用启动监控存储集群,包括mon,osd,mgr等服务,可以支持 block 块存储,file system 文件存储,object 对象存储 (S3/Swift)。 启动包括ceph agent 与 discover服务,agent 包含 Flexvolume plugin,包括attaching network storage devices, mounting volumes, and formating the filesystem 三种操作。
Usage:
rook ceph operator [flags]
包括的ceph镜像包括attacher,provisioner,csi-node-driver-registrar,cephfsplugin,rbdplugin
包括的配置存入/etc/ceph-csi目录,
--alsologtostderr=false, --csi-attacher-image=quay.io/k8scsi/csi-attacher:v1.0.1,
--csi-cephfs-image=quay.io/cephcsi/cephfsplugin:v1.0.0,
--csi-cephfs-plugin-template-path=/etc/ceph-csi/cephfs/csi-cephfsplugin.yaml,
--csi-cephfs-provisioner-template-path=/etc/ceph-csi/cephfs/csi-cephfsplugin-provisioner.yaml,
--csi-enable-cephfs=false, --csi-enable-rbd=false,
--csi-provisioner-image=quay.io/k8scsi/csi-provisioner:v1.0.1,
--csi-rbd-image=quay.io/cephcsi/rbdplugin:v1.0.0,
--csi-rbd-plugin-template-path=/etc/ceph-csi/rbd/csi-rbdplugin.yaml,
--csi-rbd-provisioner-template-path=/etc/ceph-csi/rbd/csi-rbdplugin-provisioner.yaml,
--csi-registrar-image=quay.io/k8scsi/csi-node-driver-registrar:v1.0.2,
--csi-snapshotter-image=quay.io/k8scsi/csi-snapshotter:v1.0.1,
--help=false, --log-flush-frequency=5s, --log-level=INFO,
--log_backtrace_at=:0, --log_dir=, --log_file=, --logtostderr=true,
--mon-healthcheck-interval=45s, --mon-out-timeout=10m0s,
--skip_headers=false, --stderrthreshold=2, --v=0, --vmodule=
clientset, apiExtClientset, rookClientset, err := rook.GetClientset()
if err != nil {
rook.TerminateFatal(fmt.Errorf("failed to get k8s client. %+v\n", err))
}
context := createContext()
context.NetworkInfo = clusterd.NetworkInfo{}
context.ConfigDir = k8sutil.DataDir
context.Clientset = clientset
context.APIExtensionClientset = apiExtClientset
context.RookClientset = rookClientset
namespace为rook-ceph,podname以及镜像rook/ceph:master
// Using the current image version to deploy other rook pods
pod, err := k8sutil.GetRunningPod(clientset)
if err != nil {
rook.TerminateFatal(fmt.Errorf("failed to get pod. %+v\n", err))
}
rookImage, err := k8sutil.GetContainerImage(pod, containerName)
if err != nil {
rook.TerminateFatal(fmt.Errorf("failed to get container image. %+v\n", err))
}
// New creates an operator instance
func New(context *clusterd.Context, volumeAttachmentWrapper attachment.Attachment, rookImage, securityAccount string) *Operator {
clusterController := cluster.NewClusterController(context, rookImage, volumeAttachmentWrapper)
schemes := []opkit.CustomResource{cluster.ClusterResource, pool.PoolResource, object.ObjectStoreResource, objectuser.ObjectStoreUserResource,
file.FilesystemResource, attachment.VolumeResource}
return &Operator{
context: context,
clusterController: clusterController,
resources: schemes,
rookImage: rookImage,
securityAccount: securityAccount,
}
}
主要工作是创建daemonset的pod
rookAgent := agent.New(o.context.Clientset)
if err := rookAgent.Start(namespace, o.rookImage, o.securityAccount); err != nil {
return fmt.Errorf("Error starting agent daemonset: %v", err)
}
2.1.1 FLEXVOLUME_DIR_PATH变量默认为/usr/libexec/kubernetes/kubelet-plugins/volume/exec/
curl localhost:8080/api/v1/nodes/master-node/proxy/configz
{
"kubeletconfig":{
"syncFrequency":"1m0s",
"fileCheckFrequency":"20s",
"httpCheckFrequency":"20s",
"address":"192.168.74.57",
"port":10250,
"readOnlyPort":10255,
"tlsCertFile":"/etc/kubernetes/ssl/kubelet.crt",
"tlsPrivateKeyFile":"/etc/kubernetes/ssl/kubelet.key",
"authentication":{
"x509":{
},
"webhook":{
"enabled":false,
"cacheTTL":"2m0s"
},
"anonymous":{
"enabled":true
}
},
"authorization":{
"mode":"AlwaysAllow",
"webhook":{
"cacheAuthorizedTTL":"5m0s",
"cacheUnauthorizedTTL":"30s"
}
},
"registryPullQPS":5,
"registryBurst":10,
"eventRecordQPS":5,
"eventBurst":10,
"enableDebuggingHandlers":true,
"healthzPort":10248,
"healthzBindAddress":"127.0.0.1",
"oomScoreAdj":-999,
"clusterDomain":"cluster.local.",
"clusterDNS":[
"10.200.254.254"
],
"streamingConnectionIdleTimeout":"4h0m0s",
"nodeStatusUpdateFrequency":"10s",
"nodeStatusReportFrequency":"1m0s",
"nodeLeaseDurationSeconds":40,
"imageMinimumGCAge":"2m0s",
"imageGCHighThresholdPercent":85,
"imageGCLowThresholdPercent":80,
"volumeStatsAggPeriod":"1m0s",
"cgroupsPerQOS":true,
"cgroupDriver":"cgroupfs",
"cpuManagerPolicy":"none",
"cpuManagerReconcilePeriod":"10s",
"runtimeRequestTimeout":"2m0s",
"hairpinMode":"hairpin-veth",
"maxPods":110,
"podPidsLimit":-1,
"resolvConf":"/etc/resolv.conf",
"cpuCFSQuota":true,
"cpuCFSQuotaPeriod":"100ms",
"maxOpenFiles":1000000,
"contentType":"application/vnd.kubernetes.protobuf",
"kubeAPIQPS":5,
"kubeAPIBurst":10,
"serializeImagePulls":true,
"evictionHard":{
"imagefs.available":"15%",
"memory.available":"100Mi",
"nodefs.available":"10%",
"nodefs.inodesFree":"5%"
},
"evictionPressureTransitionPeriod":"5m0s",
"enableControllerAttachDetach":true,
"makeIPTablesUtilChains":true,
"iptablesMasqueradeBit":14,
"iptablesDropBit":15,
"failSwapOn":false,
"containerLogMaxSize":"10Mi",
"containerLogMaxFiles":5,
"configMapAndSecretChangeDetectionStrategy":"Watch",
"enforceNodeAllocatable":[
"pods"
]
}
}
flexvolumeDirPath, source := a.discoverFlexvolumeDir()
logger.Infof("discovered flexvolume dir path from source %s. value: %s", source, flexvolumeDirPath)
2.1.2 agent mount security mode设置为Any
agentMountSecurityMode := os.Getenv(AgentMountSecurityModeEnv)
if agentMountSecurityMode == "" {
logger.Infof("no agent mount security mode given, defaulting to '%s' mode", MountSecurityModeAny)
agentMountSecurityMode = MountSecurityModeAny
}
if agentMountSecurityMode != MountSecurityModeAny && agentMountSecurityMode != MountSecurityModeRestricted {
return fmt.Errorf("invalid agent mount security mode specified (given: %s)", agentMountSecurityMode)
}
2.1.3 创建daemonset名为rook-ceph-agent,启动参数为ceph agent,特权模式,挂盘设置,/flexmnt /dev /sys /lib/modules
简单粗暴直接构建daemon结构
2.1.4 调用client-go API创建daemonset,比较简单
这个比较简单,调用client-go API直接创建名为disove的daemonset
如果operator开启了ROOK_CSI_ENABLE_CEPHFS或者ROOK_CSI_ENABLE_RBD则执行CSI流程
# CSI enablement - name: ROOK_CSI_ENABLE_CEPHFS value: "true" - name: ROOK_CSI_CEPHFS_IMAGE value: "quay.io/cephcsi/cephfsplugin:v1.0.0" - name: ROOK_CSI_ENABLE_RBD value: "true" - name: ROOK_CSI_RBD_IMAGE value: "quay.io/cephcsi/rbdplugin:v1.0.0" - name: ROOK_CSI_REGISTRAR_IMAGE value: "quay.io/k8scsi/csi-node-driver-registrar:v1.0.2" - name: ROOK_CSI_PROVISIONER_IMAGE value: "quay.io/k8scsi/csi-provisioner:v1.0.1" - name: ROOK_CSI_SNAPSHOTTER_IMAGE value: "quay.io/k8scsi/csi-snapshotter:v1.0.1" - name: ROOK_CSI_ATTACHER_IMAGE value: "quay.io/k8scsi/csi-attacher:v1.0.1"
调用StartCSIDrivers执行主要逻辑
if serverVersion.Major >= csi.KubeMinMajor && serverVersion.Minor >= csi.KubeMinMinor && csi.CSIEnabled() {
logger.Infof("Ceph CSI driver is enabled, validate csi param")
if err = csi.ValidateCSIParam(); err != nil {
logger.Warningf("invalid csi params: %v", err)
if csi.ExitOnError {
return err
}
} else {
csi.SetCSINamespace(namespace)
if err = csi.StartCSIDrivers(namespace, o.context.Clientset); err != nil {
logger.Warningf("failed to start Ceph csi drivers: %v", err)
if csi.ExitOnError {
return err
}
} else {
logger.Infof("successfully started Ceph csi drivers")
}
}
}
默认路径 /etc/ceph-csi/rbd/csi-rbdplugin.yaml,将变量的地方替换未CSIParam全局变量中的数据
启动包括driver-registart,csi-rbdplugin两个容器共用一个pod,使用unix:///csi/csi.sock通信
kind: DaemonSet
apiVersion: apps/v1
metadata:
name: csi-rbdplugin
namespace: {{ .Namespace }}
spec:
selector:
matchLabels:
app: csi-rbdplugin
template:
metadata:
labels:
app: csi-rbdplugin
spec:
serviceAccount: rook-csi-rbd-plugin-sa
hostNetwork: true
hostPID: true
# to use e.g. Rook orchestrated cluster, and mons' FQDN is
# resolved through k8s service, set dns policy to cluster first
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: driver-registrar
image: {{ .RegistrarImage }}
args:
- "--v=5"
- "--csi-address=/csi/csi.sock"
- "--kubelet-registration-path=/var/lib/kubelet/plugins/rbd.csi.ceph.com/csi.sock"
lifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "rm -rf /registration/csi-rbdplugin /registration/csi-rbdplugin-reg.sock"]
env:
- name: KUBE_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- name: plugin-dir
mountPath: /csi
- name: registration-dir
mountPath: /registration
- name: csi-rbdplugin
securityContext:
privileged: true
capabilities:
add: ["SYS_ADMIN"]
allowPrivilegeEscalation: true
image: {{ .RBDPluginImage }}
args :
- "--nodeid=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"
- "--v=5"
- "--drivername=rbd.csi.ceph.com"
- "--containerized=true"
- "--metadatastorage=k8s_configmap"
env:
- name: HOST_ROOTFS
value: "/rootfs"
- name: NODE_ID
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: CSI_ENDPOINT
value: unix:///csi/csi.sock
imagePullPolicy: "IfNotPresent"
func templateToDaemonSet(name, templatePath string) (*apps.DaemonSet, error) {
var ds apps.DaemonSet
t, err := loadTemplate(name, templatePath)
if err != nil {
return nil, err
}
err = yaml.Unmarshal([]byte(t), &ds)
if err != nil {
return nil, err
}
return &ds, nil
}
默认路径 /etc/ceph-csi/rbd/csi-rbdplugin-provisioner.yaml
启动了statefulset的pod,包括容器provisioner,attacher,snapshotter,rbdplugin四个容器,使用unix:///csi/csi-provisioner.sock通信
kind: StatefulSet
apiVersion: apps/v1
metadata:
name: csi-rbdplugin-provisioner
namespace: {{ .Namespace }}
spec:
serviceName: "csi-rbdplugin-provisioner"
replicas: 1
selector:
matchLabels:
app: csi-rbdplugin-provisioner
template:
metadata:
labels:
app: csi-rbdplugin-provisioner
spec:
serviceAccount: rook-csi-rbd-provisioner-sa
containers:
- name: csi-provisioner
image: {{ .ProvisionerImage }}
args:
- "--csi-address=$(ADDRESS)"
- "--v=5"
env:
- name: ADDRESS
value: unix:///csi/csi-provisioner.sock
imagePullPolicy: "IfNotPresent"
volumeMounts:
- name: socket-dir
mountPath: /csi
- name: csi-rbdplugin-attacher
image: {{ .AttacherImage }}
args:
- "--v=5"
- "--csi-address=$(ADDRESS)"
env:
- name: ADDRESS
value: /csi/csi-provisioner.sock
imagePullPolicy: "IfNotPresent"
volumeMounts:
- name: socket-dir
mountPath: /csi
- name: csi-snapshotter
image: {{ .SnapshotterImage }}
args:
- "--csi-address=$(ADDRESS)"
- "--connection-timeout=15s"
- "--v=5"
env:
- name: ADDRESS
value: unix:///csi/csi-provisioner.sock
imagePullPolicy: Always
securityContext:
privileged: true
volumeMounts:
- name: socket-dir
mountPath: /csi
- name: csi-rbdplugin
securityContext:
privileged: true
capabilities:
add: ["SYS_ADMIN"]
image: {{ .RBDPluginImage }}
args :
- "--nodeid=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"
- "--v=5"
- "--drivername=rbd.csi.ceph.com"
- "--containerized=true"
- "--metadatastorage=k8s_configmap"
env:
- name: HOST_ROOTFS
value: "/rootfs"
- name: NODE_ID
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: CSI_ENDPOINT
value: unix:///csi/csi-provisioner.sock
imagePullPolicy: "IfNotPresent"
func templateToStatefulSet(name, templatePath string) (*apps.StatefulSet, error) {
var ss apps.StatefulSet
t, err := loadTemplate(name, templatePath)
if err != nil {
return nil, err
}
err = yaml.Unmarshal([]byte(t), &ss)
if err != nil {
return nil, err
}
return &ss, nil
}
如果开启cephfs也同样处理
if rbdPlugin != nil {
err = k8sutil.CreateDaemonSet("csi rbd plugin", namespace, clientset, rbdPlugin)
if err != nil {
return fmt.Errorf("failed to start rbdplugin daemonset: %v\n%v", err, rbdPlugin)
}
}
if rbdProvisioner != nil {
_, err = k8sutil.CreateStatefulSet("csi rbd provisioner", namespace, "csi-rbdplugin-provisioner", clientset, rbdProvisioner)
if err != nil {
return fmt.Errorf("failed to start rbd provisioner statefulset: %v\n%v", err, rbdProvisioner)
}
}
if cephfsPlugin != nil {
err = k8sutil.CreateDaemonSet("csi cephfs plugin", namespace, clientset, cephfsPlugin)
if err != nil {
return fmt.Errorf("failed to start cephfs plugin daemonset: %v\n%v", err, cephfsPlugin)
}
}
if cephfsProvisioner != nil {
_, err = k8sutil.CreateStatefulSet("csi cephfs provisioner", namespace, "csi-cephfsplugin-provisioner", clientset, cephfsProvisioner)
if err != nil {
return fmt.Errorf("failed to start cephfs provisioner statefulset: %v\n%v", err, cephfsProvisioner)
}
}
provisionerName = "ceph.rook.io/block"
provisionerNameLegacy = "rook.io/block"
RookVolumeProvisioner实现了Provisioner接口,包括方法Provision与Delete
与controller manager形式一样,创建controller
// NewProvisionController creates a new provision controller using
// the given configuration parameters and with private (non-shared) informers.
func NewProvisionController(
client kubernetes.Interface,
provisionerName string,
provisioner Provisioner,
kubeVersion string,
options ...func(*ProvisionController) error,
) *ProvisionController {
id, err := os.Hostname()
if err != nil {
glog.Fatalf("Error getting hostname: %v", err)
}
两个队列,claims与volumes,也就是pv pvc资源队列
controller.claimQueue = workqueue.NewNamedRateLimitingQueue(rateLimiter, "claims")
controller.volumeQueue = workqueue.NewNamedRateLimitingQueue(rateLimiter, "volumes")
PVC回调函数Add Update Delete
// ----------------------
// PersistentVolumeClaims
claimHandler := cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) { controller.enqueueClaim(obj) },
UpdateFunc: func(oldObj, newObj interface{}) { controller.enqueueClaim(newObj) },
DeleteFunc: func(obj interface{}) {
// NOOP. The claim is either in claimsInProgress and in the queue, so it will be processed as usual
// or it's not in claimsInProgress and then we don't care
},
}
PV回调函数
// -----------------
// PersistentVolumes
volumeHandler := cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) { controller.enqueueVolume(obj) },
UpdateFunc: func(oldObj, newObj interface{}) { controller.enqueueVolume(newObj) },
DeleteFunc: func(obj interface{}) { controller.forgetVolume(obj) },
}
storageclass资源
// --------------
// StorageClasses
// no resource event handler needed for StorageClasses
if controller.classInformer == nil {
if controller.kubeVersion.AtLeast(utilversion.MustParseSemantic("v1.6.0")) {
controller.classInformer = informer.Storage().V1().StorageClasses().Informer()
} else {
controller.classInformer = informer.Storage().V1beta1().StorageClasses().Informer()
}
}
controller.classes = controller.classInformer.GetStore()
剩下的流程与其他controller manger中处理一样,可以参考https://blog.csdn.net/zhonglinzhang/article/details/89915182文章
也就是CephCluster资源,这篇文章分析
https://blog.csdn.net/zhonglinzhang/article/details/89845140
// watch for changes to the rook clusters
o.clusterController.StartWatch(namespaceToWatch, stopChan)
operator启动创建agent与discover的daemonset的服务
如果开启ceph csi则创建CSI插件,statefulset与daemon
创建controller,使用informer机制关注pv pvc storageclass
开启watch CephCluster资源进行处理