本节所有的代码基于1.13.4版本。
启动分析
Controller-manager的启动主要包括各种controller
的配置与初始化。初始化方法在NewControllerManagerCommand
下,该方法通过创建一个cobra.Command
对象,完成初始化的配置工作。
进入NewControllerManagerCommand
方法,该方法主要对controller-manager管理的所有controller进行初始化参数的配置,代码如下
// NewKubeControllerManagerOptions creates a new KubeControllerManagerOptions with a default config.
func NewKubeControllerManagerOptions() (*KubeControllerManagerOptions, error) {
componentConfig, err := NewDefaultComponentConfig(ports.InsecureKubeControllerManagerPort)
if err != nil {
return nil, err
}
// 包含各种Controller的初始化配置
s := KubeControllerManagerOptions{
Generic: cmoptions.NewGenericControllerManagerConfigurationOptions(componentConfig.Generic),
KubeCloudShared: cmoptions.NewKubeCloudSharedOptions(componentConfig.KubeCloudShared),
AttachDetachController: &AttachDetachControllerOptions{
ReconcilerSyncLoopPeriod: componentConfig.AttachDetachController.ReconcilerSyncLoopPeriod,
},
CSRSigningController: &CSRSigningControllerOptions{
ClusterSigningCertFile: componentConfig.CSRSigningController.ClusterSigningCertFile,
ClusterSigningKeyFile: componentConfig.CSRSigningController.ClusterSigningKeyFile,
ClusterSigningDuration: componentConfig.CSRSigningController.ClusterSigningDuration,
},
DaemonSetController: &DaemonSetControllerOptions{
ConcurrentDaemonSetSyncs: componentConfig.DaemonSetController.ConcurrentDaemonSetSyncs,
},
DeploymentController: &DeploymentControllerOptions{
ConcurrentDeploymentSyncs: componentConfig.DeploymentController.ConcurrentDeploymentSyncs,
DeploymentControllerSyncPeriod: componentConfig.DeploymentController.DeploymentControllerSyncPeriod,
},
DeprecatedFlags: &DeprecatedControllerOptions{
RegisterRetryCount: componentConfig.DeprecatedController.RegisterRetryCount,
},
EndpointController: &EndpointControllerOptions{
ConcurrentEndpointSyncs: componentConfig.EndpointController.ConcurrentEndpointSyncs,
},
GarbageCollectorController: &GarbageCollectorControllerOptions{
ConcurrentGCSyncs: componentConfig.GarbageCollectorController.ConcurrentGCSyncs,
EnableGarbageCollector: componentConfig.GarbageCollectorController.EnableGarbageCollector,
},
HPAController: &HPAControllerOptions{
HorizontalPodAutoscalerSyncPeriod: componentConfig.HPAController.HorizontalPodAutoscalerSyncPeriod,
HorizontalPodAutoscalerUpscaleForbiddenWindow: componentConfig.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow,
HorizontalPodAutoscalerDownscaleForbiddenWindow: componentConfig.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow,
HorizontalPodAutoscalerDownscaleStabilizationWindow: componentConfig.HPAController.HorizontalPodAutoscalerDownscaleStabilizationWindow,
HorizontalPodAutoscalerCPUInitializationPeriod: componentConfig.HPAController.HorizontalPodAutoscalerCPUInitializationPeriod,
HorizontalPodAutoscalerInitialReadinessDelay: componentConfig.HPAController.HorizontalPodAutoscalerInitialReadinessDelay,
HorizontalPodAutoscalerTolerance: componentConfig.HPAController.HorizontalPodAutoscalerTolerance,
HorizontalPodAutoscalerUseRESTClients: componentConfig.HPAController.HorizontalPodAutoscalerUseRESTClients,
},
JobController: &JobControllerOptions{
ConcurrentJobSyncs: componentConfig.JobController.ConcurrentJobSyncs,
},
NamespaceController: &NamespaceControllerOptions{
NamespaceSyncPeriod: componentConfig.NamespaceController.NamespaceSyncPeriod,
ConcurrentNamespaceSyncs: componentConfig.NamespaceController.ConcurrentNamespaceSyncs,
},
NodeIPAMController: &NodeIPAMControllerOptions{
NodeCIDRMaskSize: componentConfig.NodeIPAMController.NodeCIDRMaskSize,
},
NodeLifecycleController: &NodeLifecycleControllerOptions{
EnableTaintManager: componentConfig.NodeLifecycleController.EnableTaintManager,
NodeMonitorGracePeriod: componentConfig.NodeLifecycleController.NodeMonitorGracePeriod,
NodeStartupGracePeriod: componentConfig.NodeLifecycleController.NodeStartupGracePeriod,
PodEvictionTimeout: componentConfig.NodeLifecycleController.PodEvictionTimeout,
},
PersistentVolumeBinderController: &PersistentVolumeBinderControllerOptions{
PVClaimBinderSyncPeriod: componentConfig.PersistentVolumeBinderController.PVClaimBinderSyncPeriod,
VolumeConfiguration: componentConfig.PersistentVolumeBinderController.VolumeConfiguration,
},
PodGCController: &PodGCControllerOptions{
TerminatedPodGCThreshold: componentConfig.PodGCController.TerminatedPodGCThreshold,
},
ReplicaSetController: &ReplicaSetControllerOptions{
ConcurrentRSSyncs: componentConfig.ReplicaSetController.ConcurrentRSSyncs,
},
ReplicationController: &ReplicationControllerOptions{
ConcurrentRCSyncs: componentConfig.ReplicationController.ConcurrentRCSyncs,
},
ResourceQuotaController: &ResourceQuotaControllerOptions{
ResourceQuotaSyncPeriod: componentConfig.ResourceQuotaController.ResourceQuotaSyncPeriod,
ConcurrentResourceQuotaSyncs: componentConfig.ResourceQuotaController.ConcurrentResourceQuotaSyncs,
},
SAController: &SAControllerOptions{
ConcurrentSATokenSyncs: componentConfig.SAController.ConcurrentSATokenSyncs,
},
ServiceController: &cmoptions.ServiceControllerOptions{
ConcurrentServiceSyncs: componentConfig.ServiceController.ConcurrentServiceSyncs,
},
TTLAfterFinishedController: &TTLAfterFinishedControllerOptions{
ConcurrentTTLSyncs: componentConfig.TTLAfterFinishedController.ConcurrentTTLSyncs,
},
SecureServing: apiserveroptions.NewSecureServingOptions().WithLoopback(),
InsecureServing: (&apiserveroptions.DeprecatedInsecureServingOptions{
BindAddress: net.ParseIP(componentConfig.Generic.Address),
BindPort: int(componentConfig.Generic.Port),
BindNetwork: "tcp",
}).WithLoopback(),
Authentication: apiserveroptions.NewDelegatingAuthenticationOptions(),
Authorization: apiserveroptions.NewDelegatingAuthorizationOptions(),
}
s.Authentication.RemoteKubeConfigFileOptional = true
s.Authorization.RemoteKubeConfigFileOptional = true
s.Authorization.AlwaysAllowPaths = []string{"/healthz"}
// Set the PairName but leave certificate directory blank to generate in-memory by default
s.SecureServing.ServerCert.CertDirectory = ""
s.SecureServing.ServerCert.PairName = "kube-controller-manager"
s.SecureServing.BindPort = ports.KubeControllerManagerPort
gcIgnoredResources := make([]kubectrlmgrconfig.GroupResource, 0, len(garbagecollector.DefaultIgnoredResources()))
for r := range garbagecollector.DefaultIgnoredResources() {
gcIgnoredResources = append(gcIgnoredResources, kubectrlmgrconfig.GroupResource{Group: r.Group, Resource: r.Resource})
}
s.GarbageCollectorController.GCIgnoredResources = gcIgnoredResources
return &s, nil
}
复制代码
可以看到,我们熟悉的相应的controller都在配置内。
返回到NewControllerManagerCommand
方法,在执行最终的Run
方法之前,需要对初始化参数进行校验,并且需要对每个controller配置启动方法,相应的代码如下
KnownControllers
方法,该方法就是用来配置各种controller的启动方法,以map类型保存,最终的代码如下
func NewControllerInitializers(loopMode ControllerLoopMode) map[string]InitFunc {
controllers := map[string]InitFunc{}
controllers["endpoint"] = startEndpointController
controllers["replicationcontroller"] = startReplicationController
controllers["podgc"] = startPodGCController
controllers["resourcequota"] = startResourceQuotaController
controllers["namespace"] = startNamespaceController
controllers["serviceaccount"] = startServiceAccountController
controllers["garbagecollector"] = startGarbageCollectorController
controllers["daemonset"] = startDaemonSetController
controllers["job"] = startJobController
controllers["deployment"] = startDeploymentController
controllers["replicaset"] = startReplicaSetController
controllers["horizontalpodautoscaling"] = startHPAController
controllers["disruption"] = startDisruptionController
controllers["statefulset"] = startStatefulSetController
controllers["cronjob"] = startCronJobController
controllers["csrsigning"] = startCSRSigningController
controllers["csrapproving"] = startCSRApprovingController
controllers["csrcleaner"] = startCSRCleanerController
controllers["ttl"] = startTTLController
controllers["bootstrapsigner"] = startBootstrapSignerController
controllers["tokencleaner"] = startTokenCleanerController
controllers["nodeipam"] = startNodeIpamController
if loopMode == IncludeCloudLoops {
controllers["service"] = startServiceController
controllers["route"] = startRouteController
// TODO: volume controller into the IncludeCloudLoops only set.
// TODO: Separate cluster in cloud check from node lifecycle controller.
}
controllers["nodelifecycle"] = startNodeLifecycleController
controllers["persistentvolume-binder"] = startPersistentVolumeBinderController
controllers["attachdetach"] = startAttachDetachController
controllers["persistentvolume-expander"] = startVolumeExpandController
controllers["clusterrole-aggregation"] = startClusterRoleAggregrationController
controllers["pvc-protection"] = startPVCProtectionController
controllers["pv-protection"] = startPVProtectionController
controllers["ttl-after-finished"] = startTTLAfterFinishedController
controllers["root-ca-cert-publisher"] = startRootCACertPublisher
return controllers
}
复制代码
其中各种以start
前缀开头的均为相对应的controller的启动方法,后续分析。
进入最核心的Run
方法,主要过程包括一些server的配置、各种健康检查以及是否需要配置Leader的选举(controller-manager的选举同scheduler一致),最终依次启动所有的start
开头的controller,完成启动过程。
启动过程比较简洁,主要就是参数的配置、参数的校验以及最终的启动工作,和之前介绍的组件原理基本差不多,对controller-manager的分析主要还是要对应到每个具体的controller上。这里以最常用的Deployment为例。
Deployment的启动流程
熟悉CRD的应该都熟悉,kubernetes controller使用的是informer和workqueue的机制,即通过informer监控通知资源的变化,通过workqueue入队处理资源。Deployment也不例外。
之前说过,controller所有的入口都是以start
开头,则Deployment Controller的入口方法为startDeploymentController
。方法比较简单,如下
func startDeploymentController(ctx ControllerContext) (http.Handler, bool, error) {
if !ctx.AvailableResources[schema.GroupVersionResource{Group: "apps", Version: "v1", Resource: "deployments"}] {
return nil, false, nil
}
dc, err := deployment.NewDeploymentController(
ctx.InformerFactory.Apps().V1().Deployments(),
ctx.InformerFactory.Apps().V1().ReplicaSets(),
ctx.InformerFactory.Core().V1().Pods(),
ctx.ClientBuilder.ClientOrDie("deployment-controller"),
)
if err != nil {
return nil, true, fmt.Errorf("error creating Deployment controller: %v", err)
}
go dc.Run(int(ctx.ComponentConfig.DeploymentController.ConcurrentDeploymentSyncs), ctx.Stop)
return nil, true, nil
}
复制代码
主要分成三个步骤:
1、判断资源版本是否可用,即deployment应该为apps/v1;
2、通过NewDeploymentController
创建一个DeploymentController对象;
3、启动,循环检测资源的变化,完成最终处理任务。
进入NewDeploymentController
方法,对informer的操作主要有Deployment、ReplicaSet以及Pod,与我们对Deployment了解的资源基本符合,通过各种处理方法处理资源的CRUD等操作。
。
syncDeployment
处理Deployment的检测,保证Deployment资源一直能够被处理,
enqueue
方法保证资源的及时入队。
创建完成之后,接下来就是启动操作。通过
Run
方法进入到最终的
dc.worker
方法,最终调用的为
dc.syncHandler
方法,也就是配置中的
syncDeployment
方法,通过不停循环检测执行
syncDeployment
方法,保证资源的及时处理。方法如下:
// syncDeployment will sync the deployment with the given key.
// This function is not meant to be invoked concurrently with the same key.
func (dc *DeploymentController) syncDeployment(key string) error {
startTime := time.Now()
klog.V(4).Infof("Started syncing deployment %q (%v)", key, startTime)
defer func() {
klog.V(4).Infof("Finished syncing deployment %q (%v)", key, time.Since(startTime))
}()
// 通过key获取资源的namespace和name
namespace, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
return err
}
// 通过namespace和name获取deployment信息
deployment, err := dc.dLister.Deployments(namespace).Get(name)
if errors.IsNotFound(err) {
klog.V(2).Infof("Deployment %v has been deleted", key)
return nil
}
if err != nil {
return err
}
// Deep-copy otherwise we are mutating our cache.
// TODO: Deep-copy only when needed.
d := deployment.DeepCopy()
everything := metav1.LabelSelector{}
if reflect.DeepEqual(d.Spec.Selector, &everything) {
dc.eventRecorder.Eventf(d, v1.EventTypeWarning, "SelectingAll", "This deployment is selecting all pods. A non-empty selector is required.")
if d.Status.ObservedGeneration < d.Generation {
d.Status.ObservedGeneration = d.Generation
dc.client.AppsV1().Deployments(d.Namespace).UpdateStatus(d)
}
return nil
}
// List ReplicaSets owned by this Deployment, while reconciling ControllerRef
// through adoption/orphaning.
// 通过deployment获取其对应的ReplicaSet
rsList, err := dc.getReplicaSetsForDeployment(d)
if err != nil {
return err
}
// List all Pods owned by this Deployment, grouped by their ReplicaSet.
// Current uses of the podMap are:
//
// * check if a Pod is labeled correctly with the pod-template-hash label.
// * check that no old Pods are running in the middle of Recreate Deployments.
// 通过Deployment和ReplicaSet获取相应的pod信息
podMap, err := dc.getPodMapForDeployment(d, rsList)
if err != nil {
return err
}
// 判断需要进行什么操作
if d.DeletionTimestamp != nil {
return dc.syncStatusOnly(d, rsList)
}
// Update deployment conditions with an Unknown condition when pausing/resuming
// a deployment. In this way, we can be sure that we won't timeout when a user
// resumes a Deployment with a set progressDeadlineSeconds.
if err = dc.checkPausedConditions(d); err != nil {
return err
}
if d.Spec.Paused {
return dc.sync(d, rsList)
}
// rollback is not re-entrant in case the underlying replica sets are updated with a new
// revision so we should ensure that we won't proceed to update replica sets until we
// make sure that the deployment has cleaned up its rollback spec in subsequent enqueues.
if getRollbackTo(d) != nil {
return dc.rollback(d, rsList)
}
scalingEvent, err := dc.isScalingEvent(d, rsList)
if err != nil {
return err
}
if scalingEvent {
return dc.sync(d, rsList)
}
switch d.Spec.Strategy.Type {
case apps.RecreateDeploymentStrategyType:
return dc.rolloutRecreate(d, rsList, podMap)
case apps.RollingUpdateDeploymentStrategyType:
return dc.rolloutRolling(d, rsList)
}
return fmt.Errorf("unexpected deployment strategy type: %s", d.Spec.Strategy.Type)
}
复制代码
步骤如下:
1、通过参数key获取资源的namespace和name;
2、通过namespace和name获取deployment信息;
3、通过Deployment获取其对应的ReplicaSet信息;
4、通过Deployment和ReplicaSet获取相对应的pod信息;
5、根据配置的参数,判断需要进行什么操作,包括sync、rollback、rolloutRecreate以及rolloutRolling操作。
至此,DeploymentController的任务基本完成。我们自定义实现CRD与Controller的步骤基本与此一致,都是通过监听需要的资源,通过对资源列表和资源操作进行比对,判断对资源最终的操作,完成controller任务。