Deployment为Pod和Replica Set提供声明式更新,Deployment controller就会将Pod和Replica Set的实际状态改变到目标状态。
使用Deployment来创建ReplicaSet。ReplicaSet在后台创建pod。检查状态,通过更新Deployment的PodTemplateSpec字段来声明Pod的状态。这会创建一个新的ReplicaSet,Deployment会按照控制的速率将pod从旧的ReplicaSet移动到新的ReplicaSet中。
deployment什么时候创建的replica set
什么时候创建的pod
func NewControllerInitializers() map[string]InitFunc {
controllers := map[string]InitFunc{}
controllers["deployment"] = startDeploymentController
controllers["replicaset"] = startReplicaSetController
return controllers
}
deployments
是否开启DeploymentController
对象。启动NDeploymentController
对象的Run方法,循环处理流程func startDeploymentController(ctx ControllerContext) (bool, error) {
if !ctx.AvailableResources[schema.GroupVersionResource{Group: "extensions", Version: "v1beta1", Resource: "deployments"}] {
return false, nil
}
dc, err := deployment.NewDeploymentController(
ctx.InformerFactory.Extensions().V1beta1().Deployments(),
ctx.InformerFactory.Extensions().V1beta1().ReplicaSets(),
ctx.InformerFactory.Core().V1().Pods(),
ctx.ClientBuilder.ClientOrDie("deployment-controller"),
)
if err != nil {
return true, fmt.Errorf("error creating Deployment controller: %v", err)
}
go dc.Run(int(ctx.Options.ConcurrentDeploymentSyncs), ctx.Stop)
return true, nil
}
路径: pkg/controller/deployment/deployment_controller.go
eventBroadcaster
并设置属性 eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartLogging(glog.Infof)
// TODO: remove the wrapper when every clients have moved to use the clientset.
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(client.CoreV1().RESTClient()).Events("")})
RateLimiter
if client != nil && client.CoreV1().RESTClient().GetRateLimiter() != nil {
if err := metrics.RegisterMetricAndTrackRateLimiterUsage("deployment_controller", client.CoreV1().RESTClient().GetRateLimiter()); err != nil {
return nil, err
}
}
DeploymentController
对象,包括k8s client,队列等 dc := &DeploymentController{
client: client,
eventRecorder: eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "deployment-controller"}),
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "deployment"),
}
dc.rsControl = controller.RealRSControl{
KubeClient: client,
Recorder: dc.eventRecorder,
}
dInformer,
rsInformer
和podInformer
对应的事件回调函数dInformer注册了add update delete EventHandler
rsInformer注册了add update delete EventHandler
PodInformer注册了delete EventHandler
dInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: dc.addDeployment,
UpdateFunc: dc.updateDeployment,
// This will enter the sync loop and no-op, because the deployment has been deleted from the store.
DeleteFunc: dc.deleteDeployment,
})
rsInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: dc.addReplicaSet,
UpdateFunc: dc.updateReplicaSet,
DeleteFunc: dc.deleteReplicaSet,
})
podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
DeleteFunc: dc.deletePod,
})
dc.syncHandler = dc.syncDeployment
dc.enqueueDeployment = dc.enqueue
dc.dLister = dInformer.Lister()
dc.rsLister = rsInformer.Lister()
dc.podLister = podInformer.Lister()
dc.dListerSynced = dInformer.Informer().HasSynced
dc.rsListerSynced = rsInformer.Informer().HasSynced
dc.podListerSynced = podInformer.Informer().HasSynced
调用Run->work->processNextWorkItem->syncHandler->syncDeployment
// Run begins watching and syncing.
func (dc *DeploymentController) Run(workers int, stopCh <-chan struct{}) {
defer utilruntime.HandleCrash()
defer dc.queue.ShutDown()
glog.Infof("Starting deployment controller")
defer glog.Infof("Shutting down deployment controller")
if !controller.WaitForCacheSync("deployment", stopCh, dc.dListerSynced, dc.rsListerSynced, dc.podListerSynced) {
return
}
for i := 0; i < workers; i++ {
go wait.Until(dc.worker, time.Second, stopCh)
}
<-stopCh
}
// worker runs a worker thread that just dequeues items, processes them, and marks them done.
// It enforces that the syncHandler is never invoked concurrently with the same key.
func (dc *DeploymentController) worker() {
for dc.processNextWorkItem() {
}
}
func (dc *DeploymentController) processNextWorkItem() bool {
key, quit := dc.queue.Get()
if quit {
return false
}
defer dc.queue.Done(key)
err := dc.syncHandler(key.(string))
dc.handleErr(err, key)
return true
}
syncDeployment处理主要流程,内容比较多,如下分开讲解
namespace, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
return err
}
deployment, err := dc.dLister.Deployments(namespace).Get(name)
if errors.IsNotFound(err) {
glog.V(2).Infof("Deployment %v has been deleted", key)
return nil
}
if err != nil {
return err
}
// Deep-copy otherwise we are mutating our cache.
// TODO: Deep-copy only when needed.
d := deployment.DeepCopy()
func (dc *DeploymentController) getReplicaSetsForDeployment(d *extensions.Deployment) ([]*extensions.ReplicaSet, error) {
// List all ReplicaSets to find those we own but that no longer match our
// selector. They will be orphaned by ClaimReplicaSets().
rsList, err := dc.rsLister.ReplicaSets(d.Namespace).List(labels.Everything())
if err != nil {
return nil, err
}
deploymentSelector, err := metav1.LabelSelectorAsSelector(d.Spec.Selector)
if err != nil {
return nil, fmt.Errorf("deployment %s/%s has invalid label selector: %v", d.Namespace, d.Name, err)
}
// If any adoptions are attempted, we should first recheck for deletion with
// an uncached quorum read sometime after listing ReplicaSets (see #42639).
canAdoptFunc := controller.RecheckDeletionTimestamp(func() (metav1.Object, error) {
fresh, err := dc.client.ExtensionsV1beta1().Deployments(d.Namespace).Get(d.Name, metav1.GetOptions{})
if err != nil {
return nil, err
}
if fresh.UID != d.UID {
return nil, fmt.Errorf("original Deployment %v/%v is gone: got uid %v, wanted %v", d.Namespace, d.Name, fresh.UID, d.UID)
}
return fresh, nil
})
cm := controller.NewReplicaSetControllerRefManager(dc.rsControl, d, deploymentSelector, controllerKind, canAdoptFunc)
return cm.ClaimReplicaSets(rsList)
}
func (dc *DeploymentController) getPodMapForDeployment(d *extensions.Deployment, rsList []*extensions.ReplicaSet) (map[types.UID]*v1.PodList, error) {
// Get all Pods that potentially belong to this Deployment.
selector, err := metav1.LabelSelectorAsSelector(d.Spec.Selector)
if err != nil {
return nil, err
}
pods, err := dc.podLister.Pods(d.Namespace).List(selector)
if err != nil {
return nil, err
}
// Group Pods by their controller (if it's in rsList).
podMap := make(map[types.UID]*v1.PodList, len(rsList))
for _, rs := range rsList {
podMap[rs.UID] = &v1.PodList{}
}
for _, pod := range pods {
// Do not ignore inactive Pods because Recreate Deployments need to verify that no
// Pods from older versions are running before spinning up new Pods.
controllerRef := metav1.GetControllerOf(pod)
if controllerRef == nil {
continue
}
// Only append if we care about this UID.
if podList, ok := podMap[controllerRef.UID]; ok {
podList.Items = append(podList.Items, *pod)
}
}
return podMap, nil
}
监测是否需要更新状态
func (dc *DeploymentController) checkPausedConditions(d *extensions.Deployment) error {
if d.Spec.ProgressDeadlineSeconds == nil {
return nil
}
cond := deploymentutil.GetDeploymentCondition(d.Status, extensions.DeploymentProgressing)
if cond != nil && cond.Reason == deploymentutil.TimedOutReason {
// If we have reported lack of progress, do not overwrite it with a paused condition.
return nil
}
pausedCondExists := cond != nil && cond.Reason == deploymentutil.PausedDeployReason
needsUpdate := false
if d.Spec.Paused && !pausedCondExists {
condition := deploymentutil.NewDeploymentCondition(extensions.DeploymentProgressing, v1.ConditionUnknown, deploymentutil.PausedDeployReason, "Deployment is paused")
deploymentutil.SetDeploymentCondition(&d.Status, *condition)
needsUpdate = true
} else if !d.Spec.Paused && pausedCondExists {
condition := deploymentutil.NewDeploymentCondition(extensions.DeploymentProgressing, v1.ConditionUnknown, deploymentutil.ResumedDeployReason, "Deployment is resumed")
deploymentutil.SetDeploymentCondition(&d.Status, *condition)
needsUpdate = true
}
if !needsUpdate {
return nil
}
var err error
d, err = dc.client.ExtensionsV1beta1().Deployments(d.Namespace).UpdateStatus(d)
return err
}
if d.DeletionTimestamp != nil {
return dc.syncStatusOnly(d, rsList, podMap)
}
如果rollback则进行 相应操作
if d.Spec.Paused {
return dc.sync(d, rsList, podMap)
}
// rollback is not re-entrant in case the underlying replica sets are updated with a new
// revision so we should ensure that we won't proceed to update replica sets until we
// make sure that the deployment has cleaned up its rollback spec in subsequent enqueues.
if d.Spec.RollbackTo != nil {
return dc.rollback(d, rsList, podMap)
}
scalingEvent, err := dc.isScalingEvent(d, rsList, podMap)
if err != nil {
return err
}
if scalingEvent {
return dc.sync(d, rsList, podMap)
}
switch d.Spec.Strategy.Type {
case extensions.RecreateDeploymentStrategyType:
return dc.rolloutRecreate(d, rsList, podMap)
case extensions.RollingUpdateDeploymentStrategyType:
return dc.rolloutRolling(d, rsList, podMap)
}
如果为Recreate,则kill所有pod在进行更新
getAllReplicaSetsAndSyncRevision 函数返回该 deployment 下的所有 replica set
getNewReplicaSet,如果不存在咋创建一个新的replicaSet对象,如果存在则更新,主要是revision递增,与pod-template-hash
func (dc *DeploymentController) getAllReplicaSetsAndSyncRevision(d *apps.Deployment, rsList []*apps.ReplicaSet, createIfNotExisted bool) (*apps.ReplicaSet, []*apps.ReplicaSet, error) {
_, allOldRSs := deploymentutil.FindOldReplicaSets(d, rsList)
// Get new replica set with the updated revision number
newRS, err := dc.getNewReplicaSet(d, rsList, allOldRSs, createIfNotExisted)
if err != nil {
return nil, nil, err
}
return newRS, allOldRSs, nil
}
// Scale up, if we can.
scaledUp, err := dc.reconcileNewReplicaSet(allRSs, newRS, d)
if err != nil {
return err
}
if scaledUp {
// Update DeploymentStatus
return dc.syncRolloutStatus(allRSs, newRS, d)
}
// Scale down, if we can.
scaledDown, err := dc.reconcileOldReplicaSets(allRSs, controller.FilterActiveReplicaSets(oldRSs), newRS, d)
if err != nil {
return err
}
if scaledDown {
// Update DeploymentStatus
return dc.syncRolloutStatus(allRSs, newRS, d)
}
if deploymentutil.DeploymentComplete(d, &d.Status) {
if err := dc.cleanupDeployment(oldRSs, d); err != nil {
return err
}
}
// Sync deployment status
return dc.syncRolloutStatus(allRSs, newRS, d)
如果设置策略为 RollingUpdate
strategy:
rollingUpdate:
maxSurge: 0
maxUnavailable: 1
type: RollingUpdate
// rolloutRolling implements the logic for rolling a new replica set.
func (dc *DeploymentController) rolloutRolling(d *apps.Deployment, rsList []*apps.ReplicaSet) error {
newRS, oldRSs, err := dc.getAllReplicaSetsAndSyncRevision(d, rsList, true)
if err != nil {
return err
}
allRSs := append(oldRSs, newRS)
getAllReplicaSetsAndSyncRevision 函数返回所有的 replicaset,
func (r *ReconcileDeployment) getAllReplicaSetsAndSyncRevision(d *kromev1.Deployment, rsList []*kromev1.ReplicaSet, createIfNotExisted bool) (*kromev1.ReplicaSet, []*kromev1.ReplicaSet, error) {
_, allOldRSs := findOldReplicaSets(d, rsList)
// Get new replica set with the updated revision number
newRS, err := r.getNewReplicaSet(d, rsList, allOldRSs, createIfNotExisted)
if err != nil {
return nil, nil, err
}
return newRS, allOldRSs, nil
}
findNewReplicaSet 函数如果 deloyment.Spec 去除 hash的与 replicaset.Spec 则存在replicaset,可以使用已经存在的 replicaset。需要的是变更 revision,
如果存在,设设置注解,设置condition,并更新状态
如果不存在,则创建个新的,吧啦吧啦设置一堆堆
// Returns a replica set that matches the intent of the given deployment. Returns nil if the new replica set doesn't exist yet.
// 1. Get existing new RS (the RS that the given deployment targets, whose pod template is the same as deployment's).
// 2. If there's existing new RS, update its revision number if it's smaller than (maxOldRevision + 1), where maxOldRevision is the max revision number among all old RSes.
// 3. If there's no existing new RS and createIfNotExisted is true, create one with appropriate revision number (maxOldRevision + 1) and replicas.
// Note that the pod-template-hash will be added to adopted RSes and pods.
func (r *ReconcileDeployment) getNewReplicaSet(d *kromev1.Deployment, rsList, oldRSs []*kromev1.ReplicaSet, createIfNotExisted bool) (*kromev1.ReplicaSet, error) {
existingNewRS := findNewReplicaSet(d, rsList)
// Calculate the max revision number among all old RSes
maxOldRevision := maxRevision(oldRSs)
// Calculate revision number for this new replica set
newRevision := strconv.FormatInt(maxOldRevision+1, 10)
scaleReplicaSetAndRecordEvent 函数,需要更新一个是 副本数不一致,另一个是 注解不一致,注解包括
NewRSNewReplicas 计算deployment 回滚的原则,新的 replicaset.Replicas = deployment.Replicas。已经生成了最大pod数量,其他情况需要 scale了
// NewRSNewReplicas calculates the number of replicas a deployment's new RS should have. // When one of the followings is true, we're rolling out the deployment; otherwise, we're scaling it. // 1) The new RS is saturated: newRS's replicas == deployment's replicas // 2) Max number of pods allowed is reached: deployment's replicas + maxSurge == all RSs' replicas
func (r *ReconcileDeployment) reconcileNewReplicaSet(allRSs []*kromev1.ReplicaSet, newRS *kromev1.ReplicaSet, deployment *kromev1.Deployment) (bool, error) {
if *(newRS.Spec.Replicas) == *(deployment.Spec.Replicas) {
// Scaling not required.
return false, nil
}
if *(newRS.Spec.Replicas) > *(deployment.Spec.Replicas) {
// Scale down.
scaled, _, err := r.scaleReplicaSetAndRecordEvent(newRS, *(deployment.Spec.Replicas), deployment)
return scaled, err
}
newReplicasCount, err := NewRSNewReplicas(deployment, allRSs, newRS)
if err != nil {
return false, err
}
scaled, _, err := r.scaleReplicaSetAndRecordEvent(newRS, newReplicasCount, deployment)
return scaled, err
}
// syncRolloutStatus updates the status of a deployment during a rollout. There are
// cases this helper will run that cannot be prevented from the scaling detection,
// for example a resync of the deployment after it was scaled up. In those cases,
// we shouldn't try to estimate any progress.
func (r *ReconcileDeployment) syncRolloutStatus(allRSs []*kromev1.ReplicaSet, newRS *kromev1.ReplicaSet, d *kromev1.Deployment) error {
newStatus := calculateStatus(allRSs, newRS, d)
// If there is no progressDeadlineSeconds set, remove any Progressing condition.
if !hasProgressDeadline(d) {
removeDeploymentCondition(&newStatus, kromev1.DeploymentProgressing)
}
6.4.1 calculateStatus 函数
calculateStatus 函数主要是更新 deployment.Status.Conditions。类型为 Available
conditions:
- lastTransitionTime: "2020-04-29T08:51:40Z"
lastUpdateTime: "2020-04-29T08:51:40Z"
message: Deployment has minimum availability.
reason: MinimumReplicasAvailable
status: "True"
type: Available
// calculateStatus calculates the latest status for the provided deployment by looking into the provided replica sets.
func calculateStatus(allRSs []*kromev1.ReplicaSet, newRS *kromev1.ReplicaSet, deployment *kromev1.Deployment) kromev1.DeploymentStatus {
availableReplicas := getAvailableReplicaCountForReplicaSets(allRSs)
totalReplicas := getReplicaCountForReplicaSets(allRSs)
unavailableReplicas := totalReplicas - availableReplicas
// If unavailableReplicas is negative, then that means the Deployment has more available replicas running than
// desired, e.g. whenever it scales down. In such a case we should simply default unavailableReplicas to zero.
if unavailableReplicas < 0 {
unavailableReplicas = 0
}
6.4.2 status 的 condition 为 Progressing
- lastTransitionTime: "2020-04-28T06:02:57Z"
lastUpdateTime: "2020-04-28T07:34:54Z"
message: ReplicaSet "perf-5f4d644db6" has successfully progressed.
reason: NewReplicaSetAvailable
status: "True"
type: Progressing
// If there is only one replica set that is active then that means we are not running
// a new rollout and this is a resync where we don't need to estimate any progress.
// In such a case, we should simply not estimate any progress for this deployment.
currentCond := getDeploymentCondition(d.Status, kromev1.DeploymentProgressing)
isCompleteDeployment := newStatus.Replicas == newStatus.UpdatedReplicas && currentCond != nil && currentCond.Reason == util.NewRSAvailableReason
6.4.3 deploymentComplete 情况
newStatus.UpdatedReplicas == *(deployment.Spec.Replicas) && newStatus.Replicas == *(deployment.Spec.Replicas) && newStatus.AvailableReplicas == *(deployment.Spec.Replicas) && newStatus.ObservedGeneration >= deployment.Generation
如果deployment完成,则设置 deployment 的condition,设置 type 为 Progressing,true, NewReplicaSetAvailable
// Check for progress only if there is a progress deadline set and the latest rollout
// hasn't completed yet.
if hasProgressDeadline(d) && !isCompleteDeployment {
switch {
case deploymentComplete(d, &newStatus):
// Update the deployment conditions with a message for the new replica set that
// was successfully deployed. If the condition already exists, we ignore this update.
msg := fmt.Sprintf("Deployment %q has successfully progressed.", d.Name)
if newRS != nil {
msg = fmt.Sprintf("ReplicaSet %q has successfully progressed.", newRS.Name)
}
condition := NewDeploymentCondition(kromev1.DeploymentProgressing, v1.ConditionTrue, util.NewRSAvailableReason, msg)
setDeploymentCondition(&newStatus, *condition)
6.4.4 deploymentProgressing 情况
// deploymentProgressing reports progress for a deployment. Progress is estimated by comparing the
// current with the new status of the deployment that the controller is observing. More specifically,
// when new pods are scaled up or become ready or available, or old pods are scaled down, then we
// consider the deployment is progressing.
func deploymentProgressing(deployment *kromev1.Deployment, newStatus *kromev1.DeploymentStatus) bool {
oldStatus := deployment.Status
// Old replicas that need to be scaled down
oldStatusOldReplicas := oldStatus.Replicas - oldStatus.UpdatedReplicas
newStatusOldReplicas := newStatus.Replicas - newStatus.UpdatedReplicas
return (newStatus.UpdatedReplicas > oldStatus.UpdatedReplicas) ||
(newStatusOldReplicas < oldStatusOldReplicas) ||
newStatus.ReadyReplicas > deployment.Status.ReadyReplicas ||
newStatus.AvailableReplicas > deployment.Status.AvailableReplicas
}
getReplicaCountForReplicaSets 函数获取所有老版本的 replicas,如果总数为 0 则无需调整
func (r *ReconcileDeployment) reconcileOldReplicaSets(allRSs []*kromev1.ReplicaSet, oldRSs []*kromev1.ReplicaSet, newRS *kromev1.ReplicaSet, deployment *kromev1.Deployment) (bool, error) {
oldPodsCount := getReplicaCountForReplicaSets(oldRSs)
if oldPodsCount == 0 {
// Can't scale down further
return false, nil
}
allPodsCount := getReplicaCountForReplicaSets(allRSs)
logrus.Infof("New replica set %s/%s has %d available pods.", newRS.Namespace, newRS.Name, newRS.Status.AvailableReplicas)
maxUnavailable := maxUnavailable(*deployment)