【kubernetes/k8s源码分析】 controller-manager之deployment源码分析

Deployment是什么?

    Deployment为Pod和Replica Set提供声明式更新,Deployment controller就会将Pod和Replica Set的实际状态改变到目标状态。

   使用Deployment来创建ReplicaSet。ReplicaSet在后台创建pod。检查状态,通过更新Deployment的PodTemplateSpec字段来声明Pod的状态。这会创建一个新的ReplicaSet,Deployment会按照控制的速率将pod从旧的ReplicaSet移动到新的ReplicaSet中。

 

问题:

deployment什么时候创建的replica set

什么时候创建的pod

0. 开始

func NewControllerInitializers() map[string]InitFunc {
	controllers := map[string]InitFunc{}

	controllers["deployment"] = startDeploymentController
	controllers["replicaset"] = startReplicaSetController

	return controllers
}

  0.1 startDeploymentController函数

  • 判断deployments是否开启
  • 创建并初始化DeploymentController对象。启动NDeploymentController对象的Run方法,循环处理流程
func startDeploymentController(ctx ControllerContext) (bool, error) {
	if !ctx.AvailableResources[schema.GroupVersionResource{Group: "extensions", Version: "v1beta1", Resource: "deployments"}] {
		return false, nil
	}
	dc, err := deployment.NewDeploymentController(
		ctx.InformerFactory.Extensions().V1beta1().Deployments(),
		ctx.InformerFactory.Extensions().V1beta1().ReplicaSets(),
		ctx.InformerFactory.Core().V1().Pods(),
		ctx.ClientBuilder.ClientOrDie("deployment-controller"),
	)
	if err != nil {
		return true, fmt.Errorf("error creating Deployment controller: %v", err)
	}
	go dc.Run(int(ctx.Options.ConcurrentDeploymentSyncs), ctx.Stop)
	return true, nil
}

 

1. NewDeploymentController函数

    路径: pkg/controller/deployment/deployment_controller.go

  • 创建DeploymentController对象,具体的步骤包括

  1.1 创建eventBroadcaster并设置属性

	eventBroadcaster := record.NewBroadcaster()
	eventBroadcaster.StartLogging(glog.Infof)
	// TODO: remove the wrapper when every clients have moved to use the clientset.
	eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(client.CoreV1().RESTClient()).Events("")})

  1.2 设置速率控制RateLimiter

	if client != nil && client.CoreV1().RESTClient().GetRateLimiter() != nil {
		if err := metrics.RegisterMetricAndTrackRateLimiterUsage("deployment_controller", client.CoreV1().RESTClient().GetRateLimiter()); err != nil {
			return nil, err
		}
	}

  1.3 创建DeploymentController对象,包括k8s client,队列等

	dc := &DeploymentController{
		client:        client,
		eventRecorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "deployment-controller"}),
		queue:         workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "deployment"),
	}
	dc.rsControl = controller.RealRSControl{
		KubeClient: client,
		Recorder:   dc.eventRecorder,
	}

  1.4 dInformer,rsInformerpodInformer对应的事件回调函数

  • dInformer注册了add update delete EventHandler

  • rsInformer注册了add update delete EventHandler

  • PodInformer注册了delete EventHandler

	dInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
		AddFunc:    dc.addDeployment,
		UpdateFunc: dc.updateDeployment,
		// This will enter the sync loop and no-op, because the deployment has been deleted from the store.
		DeleteFunc: dc.deleteDeployment,
	})
	rsInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
		AddFunc:    dc.addReplicaSet,
		UpdateFunc: dc.updateReplicaSet,
		DeleteFunc: dc.deleteReplicaSet,
	})
	podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
		DeleteFunc: dc.deletePod,
	})

  1.5 设置syncHandler为syncDeployment函数

	dc.syncHandler = dc.syncDeployment
	dc.enqueueDeployment = dc.enqueue

	dc.dLister = dInformer.Lister()
	dc.rsLister = rsInformer.Lister()
	dc.podLister = podInformer.Lister()
	dc.dListerSynced = dInformer.Informer().HasSynced
	dc.rsListerSynced = rsInformer.Informer().HasSynced
	dc.podListerSynced = podInformer.Informer().HasSynced

 

2. Run函数

调用Run->work->processNextWorkItem->syncHandler->syncDeployment

// Run begins watching and syncing.
func (dc *DeploymentController) Run(workers int, stopCh <-chan struct{}) {
	defer utilruntime.HandleCrash()
	defer dc.queue.ShutDown()

	glog.Infof("Starting deployment controller")
	defer glog.Infof("Shutting down deployment controller")

	if !controller.WaitForCacheSync("deployment", stopCh, dc.dListerSynced, dc.rsListerSynced, dc.podListerSynced) {
		return
	}

	for i := 0; i < workers; i++ {
		go wait.Until(dc.worker, time.Second, stopCh)
	}

	<-stopCh
}

 

3. worker函数

  • 从queue取出key
  • 调用syncHandler处理,也就是syncDeployment函数
// worker runs a worker thread that just dequeues items, processes them, and marks them done.
// It enforces that the syncHandler is never invoked concurrently with the same key.
func (dc *DeploymentController) worker() {
	for dc.processNextWorkItem() {
	}
}

func (dc *DeploymentController) processNextWorkItem() bool {
	key, quit := dc.queue.Get()
	if quit {
		return false
	}
	defer dc.queue.Done(key)

	err := dc.syncHandler(key.(string))
	dc.handleErr(err, key)

	return true
}

 

4. syncDeployment函数

    syncDeployment处理主要流程,内容比较多,如下分开讲解

  4.1

  • 获得namespace和name
  • 根据namespace和name获得deployment对象
	namespace, name, err := cache.SplitMetaNamespaceKey(key)
	if err != nil {
		return err
	}
	deployment, err := dc.dLister.Deployments(namespace).Get(name)
	if errors.IsNotFound(err) {
		glog.V(2).Infof("Deployment %v has been deleted", key)
		return nil
	}
	if err != nil {
		return err
	}

	// Deep-copy otherwise we are mutating our cache.
	// TODO: Deep-copy only when needed.
	d := deployment.DeepCopy()

  4.2 getReplicaSetForDeployment函数

  •     获取deployment对应的RS列表 
func (dc *DeploymentController) getReplicaSetsForDeployment(d *extensions.Deployment) ([]*extensions.ReplicaSet, error) {
	// List all ReplicaSets to find those we own but that no longer match our
	// selector. They will be orphaned by ClaimReplicaSets().
	rsList, err := dc.rsLister.ReplicaSets(d.Namespace).List(labels.Everything())
	if err != nil {
		return nil, err
	}
	deploymentSelector, err := metav1.LabelSelectorAsSelector(d.Spec.Selector)
	if err != nil {
		return nil, fmt.Errorf("deployment %s/%s has invalid label selector: %v", d.Namespace, d.Name, err)
	}
	// If any adoptions are attempted, we should first recheck for deletion with
	// an uncached quorum read sometime after listing ReplicaSets (see #42639).
	canAdoptFunc := controller.RecheckDeletionTimestamp(func() (metav1.Object, error) {
		fresh, err := dc.client.ExtensionsV1beta1().Deployments(d.Namespace).Get(d.Name, metav1.GetOptions{})
		if err != nil {
			return nil, err
		}
		if fresh.UID != d.UID {
			return nil, fmt.Errorf("original Deployment %v/%v is gone: got uid %v, wanted %v", d.Namespace, d.Name, fresh.UID, d.UID)
		}
		return fresh, nil
	})
	cm := controller.NewReplicaSetControllerRefManager(dc.rsControl, d, deploymentSelector, controllerKind, canAdoptFunc)
	return cm.ClaimReplicaSets(rsList)
}

  4.3 getPodMapForDeployment函数

  • 获取deployment对应的Pod的列表 
func (dc *DeploymentController) getPodMapForDeployment(d *extensions.Deployment, rsList []*extensions.ReplicaSet) (map[types.UID]*v1.PodList, error) {
	// Get all Pods that potentially belong to this Deployment.
	selector, err := metav1.LabelSelectorAsSelector(d.Spec.Selector)
	if err != nil {

		return nil, err
	}
	pods, err := dc.podLister.Pods(d.Namespace).List(selector)
	if err != nil {
		return nil, err
	}
	// Group Pods by their controller (if it's in rsList).
	podMap := make(map[types.UID]*v1.PodList, len(rsList))
	for _, rs := range rsList {
		podMap[rs.UID] = &v1.PodList{}
	}
	for _, pod := range pods {
		// Do not ignore inactive Pods because Recreate Deployments need to verify that no
		// Pods from older versions are running before spinning up new Pods.
		controllerRef := metav1.GetControllerOf(pod)
		if controllerRef == nil {
			continue
		}
		// Only append if we care about this UID.
		if podList, ok := podMap[controllerRef.UID]; ok {
			podList.Items = append(podList.Items, *pod)
		}
	}
	return podMap, nil
}

  4.4 检测deployment是否处于pause状态

       监测是否需要更新状态

func (dc *DeploymentController) checkPausedConditions(d *extensions.Deployment) error {
	if d.Spec.ProgressDeadlineSeconds == nil {
		return nil
	}
	cond := deploymentutil.GetDeploymentCondition(d.Status, extensions.DeploymentProgressing)
	if cond != nil && cond.Reason == deploymentutil.TimedOutReason {
		// If we have reported lack of progress, do not overwrite it with a paused condition.
		return nil
	}
	pausedCondExists := cond != nil && cond.Reason == deploymentutil.PausedDeployReason

	needsUpdate := false
	if d.Spec.Paused && !pausedCondExists {
		condition := deploymentutil.NewDeploymentCondition(extensions.DeploymentProgressing, v1.ConditionUnknown, deploymentutil.PausedDeployReason, "Deployment is paused")
		deploymentutil.SetDeploymentCondition(&d.Status, *condition)
		needsUpdate = true
	} else if !d.Spec.Paused && pausedCondExists {
		condition := deploymentutil.NewDeploymentCondition(extensions.DeploymentProgressing, v1.ConditionUnknown, deploymentutil.ResumedDeployReason, "Deployment is resumed")
		deploymentutil.SetDeploymentCondition(&d.Status, *condition)
		needsUpdate = true
	}

	if !needsUpdate {
		return nil
	}

	var err error
	d, err = dc.client.ExtensionsV1beta1().Deployments(d.Namespace).UpdateStatus(d)
	return err
}

  4.5 如果已经标记删除,则只同步状态

if d.DeletionTimestamp != nil {
   return dc.syncStatusOnly(d, rsList, podMap)
}

  4.6 如果已经标记为pause状态,则只进行同步数据

   如果rollback则进行 相应操作

	if d.Spec.Paused {
		return dc.sync(d, rsList, podMap)
	}

	// rollback is not re-entrant in case the underlying replica sets are updated with a new
	// revision so we should ensure that we won't proceed to update replica sets until we
	// make sure that the deployment has cleaned up its rollback spec in subsequent enqueues.
	if d.Spec.RollbackTo != nil {
		return dc.rollback(d, rsList, podMap)
	}

  4.7 是否进行规模调整,如果变更规模则进行同步操作

	scalingEvent, err := dc.isScalingEvent(d, rsList, podMap)
	if err != nil {
		return err
	}
	if scalingEvent {
		return dc.sync(d, rsList, podMap)
	}

  4.8 判断更新操作

  • 如果为Recreate,则kill所有pod在进行更新
  • 如果为RollingUpdate,则逐个进行滚动更新
	switch d.Spec.Strategy.Type {
	case extensions.RecreateDeploymentStrategyType:
		return dc.rolloutRecreate(d, rsList, podMap)
	case extensions.RollingUpdateDeploymentStrategyType:
		return dc.rolloutRolling(d, rsList, podMap)
	}

 

5. rolloutRecreate函数

  如果为Recreate,则kill所有pod在进行更新

  5.1 getAllReplicaSetsAndSyncRevision函数

       getAllReplicaSetsAndSyncRevision 函数返回该 deployment 下的所有 replica set

     getNewReplicaSet,如果不存在咋创建一个新的replicaSet对象,如果存在则更新,主要是revision递增,与pod-template-hash

func (dc *DeploymentController) getAllReplicaSetsAndSyncRevision(d *apps.Deployment, rsList []*apps.ReplicaSet, createIfNotExisted bool) (*apps.ReplicaSet, []*apps.ReplicaSet, error) {
	_, allOldRSs := deploymentutil.FindOldReplicaSets(d, rsList)

	// Get new replica set with the updated revision number
	newRS, err := dc.getNewReplicaSet(d, rsList, allOldRSs, createIfNotExisted)
	if err != nil {
		return nil, nil, err
	}

	return newRS, allOldRSs, nil
}

    5.1.1

 

  5.2 这里没啥内容了,剩下的交给replicaset controller处理了

	// Scale up, if we can.
	scaledUp, err := dc.reconcileNewReplicaSet(allRSs, newRS, d)
	if err != nil {
		return err
	}
	if scaledUp {
		// Update DeploymentStatus
		return dc.syncRolloutStatus(allRSs, newRS, d)
	}

	// Scale down, if we can.
	scaledDown, err := dc.reconcileOldReplicaSets(allRSs, controller.FilterActiveReplicaSets(oldRSs), newRS, d)
	if err != nil {
		return err
	}
	if scaledDown {
		// Update DeploymentStatus
		return dc.syncRolloutStatus(allRSs, newRS, d)
	}

	if deploymentutil.DeploymentComplete(d, &d.Status) {
		if err := dc.cleanupDeployment(oldRSs, d); err != nil {
			return err
		}
	}

	// Sync deployment status
	return dc.syncRolloutStatus(allRSs, newRS, d)

 

6. RollingUpdate

   如果设置策略为 RollingUpdate

  strategy:
    rollingUpdate:
      maxSurge: 0
      maxUnavailable: 1
    type: RollingUpdate

// rolloutRolling implements the logic for rolling a new replica set.
func (dc *DeploymentController) rolloutRolling(d *apps.Deployment, rsList []*apps.ReplicaSet) error {
	newRS, oldRSs, err := dc.getAllReplicaSetsAndSyncRevision(d, rsList, true)
	if err != nil {
		return err
	}
	allRSs := append(oldRSs, newRS)

    6.1 getAllReplicaSetsAndSyncRevision 函数

    getAllReplicaSetsAndSyncRevision 函数返回所有的 replicaset,

func (r *ReconcileDeployment) getAllReplicaSetsAndSyncRevision(d *kromev1.Deployment, rsList []*kromev1.ReplicaSet, createIfNotExisted bool) (*kromev1.ReplicaSet, []*kromev1.ReplicaSet, error) {
	_, allOldRSs := findOldReplicaSets(d, rsList)

	// Get new replica set with the updated revision number
	newRS, err := r.getNewReplicaSet(d, rsList, allOldRSs, createIfNotExisted)
	if err != nil {
		return nil, nil, err
	}
	return newRS, allOldRSs, nil
}

    6.2 getNewReplicaSet 函数

    findNewReplicaSet 函数如果 deloyment.Spec 去除 hash的与 replicaset.Spec 则存在replicaset,可以使用已经存在的 replicaset。需要的是变更 revision,

     如果存在,设设置注解,设置condition,并更新状态

     如果不存在,则创建个新的,吧啦吧啦设置一堆堆

// Returns a replica set that matches the intent of the given deployment. Returns nil if the new replica set doesn't exist yet.
// 1. Get existing new RS (the RS that the given deployment targets, whose pod template is the same as deployment's).
// 2. If there's existing new RS, update its revision number if it's smaller than (maxOldRevision + 1), where maxOldRevision is the max revision number among all old RSes.
// 3. If there's no existing new RS and createIfNotExisted is true, create one with appropriate revision number (maxOldRevision + 1) and replicas.
// Note that the pod-template-hash will be added to adopted RSes and pods.
func (r *ReconcileDeployment) getNewReplicaSet(d *kromev1.Deployment, rsList, oldRSs []*kromev1.ReplicaSet, createIfNotExisted bool) (*kromev1.ReplicaSet, error) {
	existingNewRS := findNewReplicaSet(d, rsList)

	// Calculate the max revision number among all old RSes
	maxOldRevision := maxRevision(oldRSs)
	// Calculate revision number for this new replica set
	newRevision := strconv.FormatInt(maxOldRevision+1, 10)

    6.3 reconcileNewReplicaSet 函数

     scaleReplicaSetAndRecordEvent 函数,需要更新一个是 副本数不一致,另一个是 注解不一致,注解包括

  •   deployment.kubernetes.io/desired-replicas
  •   deployment.kubernetes.io/max-replicas,计算是这样子的: replica + maxsurge

     NewRSNewReplicas 计算deployment 回滚的原则,新的 replicaset.Replicas = deployment.Replicas。已经生成了最大pod数量,其他情况需要 scale了

// NewRSNewReplicas calculates the number of replicas a deployment's new RS should have.
// When one of the followings is true, we're rolling out the deployment; otherwise, we're scaling it.
// 1) The new RS is saturated: newRS's replicas == deployment's replicas
// 2) Max number of pods allowed is reached: deployment's replicas + maxSurge == all RSs' replicas
func (r *ReconcileDeployment) reconcileNewReplicaSet(allRSs []*kromev1.ReplicaSet, newRS *kromev1.ReplicaSet, deployment *kromev1.Deployment) (bool, error) {
	if *(newRS.Spec.Replicas) == *(deployment.Spec.Replicas) {
		// Scaling not required.
		return false, nil
	}
	if *(newRS.Spec.Replicas) > *(deployment.Spec.Replicas) {
		// Scale down.
		scaled, _, err := r.scaleReplicaSetAndRecordEvent(newRS, *(deployment.Spec.Replicas), deployment)
		return scaled, err
	}
	newReplicasCount, err := NewRSNewReplicas(deployment, allRSs, newRS)
	if err != nil {
		return false, err
	}
	scaled, _, err := r.scaleReplicaSetAndRecordEvent(newRS, newReplicasCount, deployment)
	return scaled, err
}

    6.4  syncRolloutStatus 函数

// syncRolloutStatus updates the status of a deployment during a rollout. There are
// cases this helper will run that cannot be prevented from the scaling detection,
// for example a resync of the deployment after it was scaled up. In those cases,
// we shouldn't try to estimate any progress.
func (r *ReconcileDeployment) syncRolloutStatus(allRSs []*kromev1.ReplicaSet, newRS *kromev1.ReplicaSet, d *kromev1.Deployment) error {
	newStatus := calculateStatus(allRSs, newRS, d)

	// If there is no progressDeadlineSeconds set, remove any Progressing condition.
	if !hasProgressDeadline(d) {
		removeDeploymentCondition(&newStatus, kromev1.DeploymentProgressing)
	}

     6.4.1 calculateStatus 函数

  • getAvailableReplicaCountForReplicaSets 函数获取所有的replicaset.Status.AvailableReplicas
  • getReplicaCountForReplicaSets 函数获取所有的 replicaset.spec.Replicas

     calculateStatus 函数主要是更新 deployment.Status.Conditions。类型为 Available

  conditions:
  - lastTransitionTime: "2020-04-29T08:51:40Z"
    lastUpdateTime: "2020-04-29T08:51:40Z"
    message: Deployment has minimum availability.
    reason: MinimumReplicasAvailable
    status: "True"
    type: Available

// calculateStatus calculates the latest status for the provided deployment by looking into the provided replica sets.
func calculateStatus(allRSs []*kromev1.ReplicaSet, newRS *kromev1.ReplicaSet, deployment *kromev1.Deployment) kromev1.DeploymentStatus {
	availableReplicas := getAvailableReplicaCountForReplicaSets(allRSs)
	totalReplicas := getReplicaCountForReplicaSets(allRSs)
	unavailableReplicas := totalReplicas - availableReplicas
	// If unavailableReplicas is negative, then that means the Deployment has more available replicas running than
	// desired, e.g. whenever it scales down. In such a case we should simply default unavailableReplicas to zero.
	if unavailableReplicas < 0 {
		unavailableReplicas = 0
	}

      6.4.2 status 的 condition 为 Progressing

  - lastTransitionTime: "2020-04-28T06:02:57Z"
    lastUpdateTime: "2020-04-28T07:34:54Z"
    message: ReplicaSet "perf-5f4d644db6" has successfully progressed.
    reason: NewReplicaSetAvailable
    status: "True"
    type: Progressing

// If there is only one replica set that is active then that means we are not running
// a new rollout and this is a resync where we don't need to estimate any progress.
// In such a case, we should simply not estimate any progress for this deployment.
currentCond := getDeploymentCondition(d.Status, kromev1.DeploymentProgressing)
isCompleteDeployment := newStatus.Replicas == newStatus.UpdatedReplicas && currentCond != nil && currentCond.Reason == util.NewRSAvailableReason

    6.4.3 deploymentComplete 情况

newStatus.UpdatedReplicas == *(deployment.Spec.Replicas) &&
   newStatus.Replicas == *(deployment.Spec.Replicas) &&
   newStatus.AvailableReplicas == *(deployment.Spec.Replicas) &&
   newStatus.ObservedGeneration >= deployment.Generation

     如果deployment完成,则设置 deployment 的condition,设置 type 为 Progressing,true, NewReplicaSetAvailable

// Check for progress only if there is a progress deadline set and the latest rollout
// hasn't completed yet.
if hasProgressDeadline(d) && !isCompleteDeployment {
	switch {
	case deploymentComplete(d, &newStatus):
		// Update the deployment conditions with a message for the new replica set that
		// was successfully deployed. If the condition already exists, we ignore this update.
		msg := fmt.Sprintf("Deployment %q has successfully progressed.", d.Name)
		if newRS != nil {
			msg = fmt.Sprintf("ReplicaSet %q has successfully progressed.", newRS.Name)
		}
		condition := NewDeploymentCondition(kromev1.DeploymentProgressing, v1.ConditionTrue, util.NewRSAvailableReason, msg)
		setDeploymentCondition(&newStatus, *condition)

      6.4.4 deploymentProgressing 情况

// deploymentProgressing reports progress for a deployment. Progress is estimated by comparing the
// current with the new status of the deployment that the controller is observing. More specifically,
// when new pods are scaled up or become ready or available, or old pods are scaled down, then we
// consider the deployment is progressing.
func deploymentProgressing(deployment *kromev1.Deployment, newStatus *kromev1.DeploymentStatus) bool {
	oldStatus := deployment.Status

	// Old replicas that need to be scaled down
	oldStatusOldReplicas := oldStatus.Replicas - oldStatus.UpdatedReplicas
	newStatusOldReplicas := newStatus.Replicas - newStatus.UpdatedReplicas

	return (newStatus.UpdatedReplicas > oldStatus.UpdatedReplicas) ||
		(newStatusOldReplicas < oldStatusOldReplicas) ||
		newStatus.ReadyReplicas > deployment.Status.ReadyReplicas ||
		newStatus.AvailableReplicas > deployment.Status.AvailableReplicas
}

    6.5 reconcileOldReplicaSets 函数

     getReplicaCountForReplicaSets 函数获取所有老版本的 replicas,如果总数为 0 则无需调整

func (r *ReconcileDeployment) reconcileOldReplicaSets(allRSs []*kromev1.ReplicaSet, oldRSs []*kromev1.ReplicaSet, newRS *kromev1.ReplicaSet, deployment *kromev1.Deployment) (bool, error) {
	oldPodsCount := getReplicaCountForReplicaSets(oldRSs)
	if oldPodsCount == 0 {
		// Can't scale down further
		return false, nil
	}

	allPodsCount := getReplicaCountForReplicaSets(allRSs)
	logrus.Infof("New replica set %s/%s has %d available pods.", newRS.Namespace, newRS.Name, newRS.Status.AvailableReplicas)
	maxUnavailable := maxUnavailable(*deployment)

 

你可能感兴趣的:(【kubernetes/k8s源码分析】 controller-manager之deployment源码分析)