kafka broker shutdown过程分析
controlled shutdown通过给controller发送命令实现停止指定broker
实现方式很诡异,controller并没有提供任何socket或者http方式开放接口,而是提供了一个 jmx bean,命令行工具通过jmx revoke方式调用controller中提供的接口shutdownBroker
val jmxUrl = newJMXServiceURL("service:jmx:rmi:///jndi/rmi://%s:%d/jmxrmi".format(controllerHost, controllerJmxPort)) info("Connecting to jmx url "+ jmxUrl) val jmxc = JMXConnectorFactory.connect(jmxUrl,null) val mbsc = jmxc.getMBeanServerConnection val leaderPartitionsRemaining =mbsc.invoke(new ObjectName(KafkaController.MBeanName), "shutdownBroker", Array(params.brokerId), Array(classOf[Int].getName)).asInstanceOf[Set[TopicAndPartition]] |
shutdown broker的逻辑
(1)partition的leader是此broker,调用 partitionStateMachine.handleStateChanges,
(2)partition的leader不是此broker,给其发送stopReplicaRequest,并调用 replicaStateMachine.handleStateChanges
def shutdownBroker(id: Int) : Set[TopicAndPartition]= { if(!isActive()) { thrownew ControllerMovedException("Controller moved to another broker. Aborting controlled shutdown") } controllerContext.brokerShutdownLock synchronized { info("Shutting down broker "+ id) inLock(controllerContext.controllerLock) { if(!controllerContext.liveOrShuttingDownBrokerIds.contains(id)) thrownew BrokerNotAvailableException("Broker id %d does not exist.".format(id)) controllerContext.shuttingDownBrokerIds.add(id) debug("All shutting down brokers: "+ controllerContext.shuttingDownBrokerIds.mkString(",")) debug("Live brokers: "+ controllerContext.liveBrokerIds.mkString(",")) } //获取此broker上所有partition的副本因子 valallPartitionsAndReplicationFactorOnBroker:Set[(TopicAndPartition, Int)] = inLock(controllerContext.controllerLock) { controllerContext.partitionsOnBroker(id) .map(topicAndPartition => (topicAndPartition, controllerContext.partitionReplicaAssignment(topicAndPartition).size)) }
allPartitionsAndReplicationFactorOnBroker.foreach { case(topicAndPartition, replicationFactor)=> // Move leadership serially to relinquish lock. inLock(controllerContext.controllerLock) { controllerContext.partitionLeadershipInfo.get(topicAndPartition).foreach { currLeaderIsrAndControllerEpoch=> if(currLeaderIsrAndControllerEpoch.leaderAndIsr.leader ==id) { // If the broker leads the topic partition, transition the leader and update isr. Updates zk and // notifies all affected brokers partitionStateMachine.handleStateChanges(Set(topicAndPartition), OnlinePartition, controlledShutdownPartitionLeaderSelector) } else { // Stop the replica first. The state change below initiates ZK changes which should take some time // before which the stop replica request should be completed (in most cases) // all requests are send in batch group by broker brokerRequestBatch.newBatch() brokerRequestBatch.addStopReplicaRequestForBrokers(Seq(id), topicAndPartition.topic, topicAndPartition.partition, deletePartition= false) brokerRequestBatch.sendRequestsToBrokers(epoch, controllerContext.correlationId.getAndIncrement) // If the broker is a follower, updates the isr in ZK and notifies the current leader replicaStateMachine.handleStateChanges(Set(PartitionAndReplica(topicAndPartition.topic, topicAndPartition.partition, id)), OfflineReplica) } } } } defreplicatedPartitionsBrokerLeads() =inLock(controllerContext.controllerLock) { trace("All leaders = "+ controllerContext.partitionLeadershipInfo.mkString(",")) controllerContext.partitionLeadershipInfo.filter { case(topicAndPartition, leaderIsrAndControllerEpoch) => leaderIsrAndControllerEpoch.leaderAndIsr.leader== id && controllerContext.partitionReplicaAssignment(topicAndPartition).size >1 }.map(_._1) } replicatedPartitionsBrokerLeads().toSet } } |
partitionStateMachine.handleStateChanges 处理逻辑
replicaStateMachine.handleStateChanges 处理逻辑