【kubernetes/k8s源码分析】kube-controller-manager之replication controller源码分析

 

replicationController-rc

 

    副本控制器简称 RC,主要控制一个pods组的副本数。

    K8S 中通过 Replication Controller 来进行管理,为每个 Pod 设置一个期望的副本数,当实际副本数与期望不符时,就动态的进行数量调整,以达到期望值。

    ReplicationManager 结构体

  • kubeClient: 向 api server 请求的客户端
  • podControl: 操作 pod 的接口
  • burstReplicas: 批量操作 pod 允许的并发数
  • syncHandler: 真正执行 replica sync 的函数
type ReplicationManager struct {
       kubeClient clientset.Interface
       podControl controller.PodControlInterface

       burstReplicas int
       // To allow injection of syncReplicationController for testing.
       syncHandler func(rcKey string) error

       // A TTLCache of pod creates/deletes each rc expects to see.
       expectations *controller.UIDTrackingControllerExpectations

       rcLister       corelisters.ReplicationControllerLister
       rcListerSynced cache.InformerSynced

       podLister corelisters.PodLister
       podListerSynced cache.InformerSynced

       // Controllers that need to be synced
       queue workqueue.RateLimitingInterface
}

1 replication controller 启动流程

   startReplicaionController 启动一个 goroutine,调用 replicationcontroller.NewReplicationManager 创建一个ReplicationManager 并执行 Run 方法

func startReplicationController(ctx ControllerContext) (bool, error) {
       go replicationcontroller.NewReplicationManager(
              ctx.InformerFactory.Core().V1().Pods(),
              ctx.InformerFactory.Core().V1().ReplicationControllers(),
              ctx.ClientBuilder.ClientOrDie("replication-controller"),
              replicationcontroller.BurstReplicas,
       ).Run(int(ctx.Options.ConcurrentRCSyncs), ctx.Stop)
       return true, nil
}

 

1.1 NewReplicaionManager 函数

   NewReplicationManager初始化 ReplicationManager 结构体,设置 apiserver 客户端,pod 擦做接口,工作队列,rcLister,podLister等,最主要的是 syncHandler 为 syncReplicationController 将 2 节讲解

// NewReplicationManager configures a replication manager with the specified event recorder
func NewReplicationManager(podInformer coreinformers.PodInformer, rcInformer coreinformers.ReplicationControllerInformer, kubeClient clientset.Interface, burstReplicas int) *ReplicationManager{
       rm := &ReplicationManager{
              kubeClient: kubeClient,
              podControl: controller.RealPodControl{
                     KubeClient: kubeClient,
                     Recorder:   eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "replication-controller"}),
              },
              burstReplicas: burstReplicas,
              expectations:  controller.NewUIDTrackingControllerExpectations(controller.NewControllerExpectations()),
              queue:         workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "replicationmanager"),
       }

       rcInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
              
       rm.rcLister = rcInformer.Lister()
       rm.rcListerSynced = rcInformer.Informer().HasSynced

       podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
              
       rm.podLister = podInformer.Lister()
       rm.podListerSynced = podInformer.Informer().HasSynced

       rm.syncHandler = rm.syncReplicationController
       return rm
}

 

1.2 Run 函数

   Run 函数主要是 watch  和 sync,watch 主要是执行了 podListerSynced 和 rcListerSynced

// Run begins watching and syncing.
func (rm *ReplicationManager) Run(workers int, stopCh <-chan struct{}) {
       defer utilruntime.HandleCrash()
       defer rm.queue.ShutDown()

       glog.Infof("Starting RC controller")
       defer glog.Infof("Shutting down RC controller")

       if !controller.WaitForCacheSync("RC", stopCh, rm.podListerSynced, rm.rcListerSynced) {
              return
       }

       for i := 0; i < workers; i++ {
              go wait.Until(rm.worker, time.Second, stopCh)
       }

       <-stopCh
}

 

   sync 主要是启动几个 worker 进行处理,从 queue 取出 key,然后调用 syncReplicaionController 进行处理 2 节讲解

// worker runs a worker thread that just dequeues items, processes them, and marks them done.
// It enforces that the syncHandler is never invoked concurrently with the same key.
func (rm *ReplicationManager) worker() {
       for rm.processNextWorkItem() {
       }
       glog.Infof("replication controller worker shutting down")
}

func (rm *ReplicationManager) processNextWorkItem() bool {
       key, quit := rm.queue.Get()
       if quit {
              return false
       }
       defer rm.queue.Done(key)

       err := rm.syncHandler(key.(string))
       if err == nil {
              rm.queue.Forget(key)
              return true
       }

       rm.queue.AddRateLimited(key)
       utilruntime.HandleError(err)
       return true
}


2 syncReplicationController 函数

// syncReplicationController will sync the rc with the given key if it has had its expectations fulfilled, meaning
// it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked
// concurrently with the same key.
func (rm *ReplicationManager) syncReplicationController(key string) error

 

   2.1 SpliteMetaNamespaceKey 根据 key 提取 namespace 和 name,然后在根据 namespace 和 name 得到 replication controller

namespace, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
       return err
}
rc, err := rm.rcLister.ReplicationControllers(namespace).Get(name)
if errors.IsNotFound(err) {
       glog.Infof("Replication Controller has been deleted %v", key)
       rm.expectations.DeleteExpectations(key)
       return nil
}

 

   2.2 StatisfiedExpecations 从 cache storage 获取 controller 的 ControlleeExpecatations 中的 add del timestamp 等

// SatisfiedExpectations returns true if the required adds/dels for the given controller have been observed.
// Add/del counts are established by the controller at sync time, and updated as controllees are observed by the controller
// manager.
func (r *ControllerExpectations) SatisfiedExpectations(controllerKey string) bool {
       if exp, exists, err := r.GetExpectations(controllerKey); exists {
              if exp.Fulfilled() {
                     glog.V(4).Infof("Controller expectations fulfilled %#v", exp)
                     return true
              } else if exp.isExpired() {
                     glog.V(4).Infof("Controller expectations expired %#v", exp)
                     return true
              } else {
                     glog.V(4).Infof("Controller still waiting on expectations %#v", exp)
                     return false
              }
       } else if err != nil {
              glog.V(2).Infof("Error encountered while checking expectations %#v, forcing sync", err)
       } else {
             
       }
       // Trigger a sync if we either encountered and error (which shouldn't happen since we're
       // getting from local store) or this controller hasn't established expectations.
       return true
}

   

    2.3 manageReplicas 需要 sync 和当前 rc 时间戳为 nil 调用,配置与期望一致状态,3 节讲解 manageReplicas

var manageReplicasErr error
if rcNeedsSync && rc.DeletionTimestamp == nil {
       manageReplicasErr = rm.manageReplicas(filteredPods, rc)
}

 

 

3 manageReplicas 函数

    manageReplicas 根据 RC 检查以及更新副本,负责控制 RC 副本的数量,进行相应的 add / del 操作

 

    3.1 计算 filteredPods 的数量和 期望的数量差值 diff,如果为 0 表示与预期的一样无需在进行修复操作

diff := len(filteredPods) - int(*(rc.Spec.Replicas))
rcKey, err := controller.KeyFunc(rc)
if err != nil {
       return err
}
if diff == 0 {
       return nil
}

 

    3.2 diff 小于 0 表示需要创建差值个数的 pod,异步调用 CreatePodWIthControllerRef 创建 pod

if diff < 0 {
       rm.expectations.ExpectCreations(rcKey, diff)

       for i := 0; i < diff; i++ {
              go func() {
                     boolPtr := func(b bool) *bool { return &b }
                     controllerRef := &metav1.OwnerReference{
                            APIVersion:         controllerKind.GroupVersion().String(),
                            Kind:               controllerKind.Kind,
                            Name:               rc.Name,
                            UID:                rc.UID,
                            BlockOwnerDeletion: boolPtr(true),
                            Controller:         boolPtr(true),
                     }
                     err = rm.podControl.CreatePodsWithControllerRef(rc.Namespace, rc.Spec.Template, rc, controllerRef)
           
              }()
       }
     
       return nil
}

 

   3.2 diff 大于 0 表示 pod 数量多于预期的数量

   对 pod 进行排序,按照 not-ready < ready,unscheduled < scheduler,pending < running 进行排序

if *(rc.Spec.Replicas) != 0 {
       // Sort the pods in the order such that not-ready < ready, unscheduled
       // < scheduled, and pending < running. This ensures that we delete pods
       // in the earlier stages whenever possible.
       sort.Sort(controller.ActivePods(filteredPods))
}

 

    调用 pod 接口 DeletePod 删除多余的 pod

for i := 0; i < diff; i++ {
       go func(ix int) {
              defer wg.Done()
              if err := rm.podControl.DeletePod(rc.Namespace, filteredPods[ix].Name, rc); err != nil {
                     // Decrement the expected number of deletes because the informer won't observe this deletion
                     podKey := controller.PodKey(filteredPods[ix])
                     glog.V(2).Infof("Failed to delete %v due to %v, decrementing expectations for controller %q/%q", podKey, err, rc.Namespace, rc.Name)
                     rm.expectations.DeletionObserved(rcKey, podKey)
                     errCh <- err
                     utilruntime.HandleError(err)
              }
       }(i)
}

 

总结

 

  • kube-controller-manager 启动时注册的 replication controller 为 StartReplicationController
  • NewReplicationManager 配置启动的选项
  • 主要执行体 Run 进行 watch 和 sync,sync 从 queue 取出 key 进行修复处理

 

 

 

你可能感兴趣的:(【kubernetes/k8s源码分析】kube-controller-manager之replication controller源码分析)