broker change是由BrokerChangeListener监听类,监听/brokers/ids下得brokerid
BrokerChangeListener的handleChildChange
将新的死的broker交由controller管理
class BrokerChangeListener() extends IZkChildListener with Logging { this.logIdent = "[BrokerChangeListener on Controller " + controller.config.brokerId + "]: " def handleChildChange(parentPath : String, currentBrokerList : java.util.List[String]) { info("Broker change listener fired for path %s with children %s".format(parentPath, currentBrokerList.mkString(","))) inLock(controllerContext.controllerLock) { if (hasStarted.get) { ControllerStats.leaderElectionTimer.time { try { val curBrokerIds = currentBrokerList.map(_.toInt).toSet val newBrokerIds = curBrokerIds -- controllerContext.liveOrShuttingDownBrokerIds val newBrokerInfo = newBrokerIds.map(ZkUtils.getBrokerInfo(zkClient, _)) val newBrokers = newBrokerInfo.filter(_.isDefined).map(_.get) val deadBrokerIds = controllerContext.liveOrShuttingDownBrokerIds -- curBrokerIds controllerContext.liveBrokers = curBrokerIds.map(ZkUtils.getBrokerInfo(zkClient, _)).filter(_.isDefined).map(_.get) info("Newly added brokers: %s, deleted brokers: %s, all live brokers: %s" .format(newBrokerIds.mkString(","), deadBrokerIds.mkString(","), controllerContext.liveBrokerIds.mkString(","))) newBrokers.foreach(controllerContext.controllerChannelManager.addBroker(_)) deadBrokerIds.foreach(controllerContext.controllerChannelManager.removeBroker(_)) //将变化的broker,交由controller管理 if(newBrokerIds.size > 0) controller.onBrokerStartup(newBrokerIds.toSeq) if(deadBrokerIds.size > 0) controller.onBrokerFailure(deadBrokerIds.toSeq) } catch { case e: Throwable => error("Error while handling broker changes", e) } } } } }
kafkacontroller的onBrokerFailure
def onBrokerFailure(deadBrokers: Seq[Int]) { info("Broker failure callback for %s".format(deadBrokers.mkString(","))) val deadBrokersThatWereShuttingDown = deadBrokers.filter(id => controllerContext.shuttingDownBrokerIds.remove(id)) info("Removed %s from list of shutting down brokers.".format(deadBrokersThatWereShuttingDown)) val deadBrokersSet = deadBrokers.toSet // trigger OfflinePartition state for all partitions whose current leader is one amongst the dead brokers // 先处理死掉的broker上的leader的tp val partitionsWithoutLeader = controllerContext.partitionLeadershipInfo.filter(partitionAndLeader => deadBrokersSet.contains(partitionAndLeader._2.leaderAndIsr.leader) && !deleteTopicManager.isTopicQueuedUpForDeletion(partitionAndLeader._1.topic)).keySet //下线tp,没有meta信息更新,不发送 partitionStateMachine.handleStateChanges(partitionsWithoutLeader, OfflinePartition) // trigger OnlinePartition state changes for offline or new partitions //上线选举leader,并上线,更新各个broker meta信息 partitionStateMachine.triggerOnlinePartitionStateChange() // filter out the replicas that belong to topics that are being deleted var allReplicasOnDeadBrokers = controllerContext.replicasOnBrokers(deadBrokersSet) val activeReplicasOnDeadBrokers = allReplicasOnDeadBrokers.filterNot(p => deleteTopicManager.isTopicQueuedUpForDeletion(p.topic)) // handle dead replicas 处理死掉的replica(所有) replicaStateMachine.handleStateChanges(activeReplicasOnDeadBrokers, OfflineReplica) // check if topic deletion state for the dead replicas needs to be updated val replicasForTopicsToBeDeleted = allReplicasOnDeadBrokers.filter(p => deleteTopicManager.isTopicQueuedUpForDeletion(p.topic)) if(replicasForTopicsToBeDeleted.size > 0) { // it is required to mark the respective replicas in TopicDeletionFailed state since the replica cannot be // deleted when the broker is down. This will prevent the replica from being in TopicDeletionStarted state indefinitely // since topic deletion cannot be retried until at least one replica is in TopicDeletionStarted state deleteTopicManager.failReplicaDeletion(replicasForTopicsToBeDeleted) } }
PartitionStateMachine的triggerOnlinePartitionStateChange,调用方法是将新的leader上线
调用electLeaderForPartition
val (leaderAndIsr, replicas) = leaderSelector.selectLeader(topicAndPartition, currentLeaderAndIsr) val (updateSucceeded, newVersion) = ReplicationUtils.updateLeaderAndIsr(zkClient, topic, partition, leaderAndIsr, controller.epoch, currentLeaderAndIsr.zkVersion) newLeaderAndIsr = leaderAndIsr newLeaderAndIsr.zkVersion = newVersion // update the leader cache controllerContext.partitionLeadershipInfo.put(TopicAndPartition(topic, partition), newLeaderIsrAndControllerEpoch) stateChangeLogger.trace("Controller %d epoch %d elected leader %d for Offline partition %s" .format(controllerId, controller.epoch, newLeaderAndIsr.leader, topicAndPartition)) val replicas = controllerContext.partitionReplicaAssignment(TopicAndPartition(topic, partition)) // store new leader and isr info in cache 更新broker元数据信息 updateMetadataRequestMap brokerRequestBatch.addLeaderAndIsrRequestForBrokers(replicasForThisPartition, topic, partition, newLeaderIsrAndControllerEpoch, replicas)
partitionStateMachine的leaderselect变量OfflinePartitionLeaderSelector
选举broker上的tp的新leader
case false => //选择在线broker中得replica的第一个作为leader val liveReplicasInIsr = liveAssignedReplicas.filter(r => liveBrokersInIsr.contains(r)) val newLeader = liveReplicasInIsr.head debug("Some broker in ISR is alive for %s. Select %d from ISR %s to be the leader." .format(topicAndPartition, newLeader, liveBrokersInIsr.mkString(","))) new LeaderAndIsr(newLeader, currentLeaderEpoch + 1, liveBrokersInIsr.toList, currentLeaderIsrZkPathVersion + 1) } info("Selected new leader and ISR %s for offline partition %s".format(newLeaderAndIsr.toString(), topicAndPartition)) (newLeaderAndIsr, liveAssignedReplicas)