BlockManager主要在deriver和excutor构造。在deriver构造了一个BlockManagerMasterActor对象,主要负责收集block的info。在executor创建了BlockManagerMasterActor的ref,并且将ref封装到BlockManagerMaster中用于与BlockManagerMasterActor的通信。BlockManager封装了BlockManagerMaster,用于存储block,并调用BlockManagerMaster与master通信。
//BlockManagerMasterActor 处理的消息。updateBolckinfo主要是excutor向deriver报告block的信息。 def receive = { case RegisterBlockManager(blockManagerId, maxMemSize, slaveActor) => register(blockManagerId, maxMemSize, slaveActor) sender ! true case UpdateBlockInfo(blockManagerId, blockId, storageLevel, deserializedSize, size) => // TODO: Ideally we want to handle all the message replies in receive instead of in the // individual private methods. updateBlockInfo(blockManagerId, blockId, storageLevel, deserializedSize, size) case GetLocations(blockId) => sender ! getLocations(blockId) case GetLocationsMultipleBlockIds(blockIds) => sender ! getLocationsMultipleBlockIds(blockIds) case GetPeers(blockManagerId, size) => sender ! getPeers(blockManagerId, size) case GetMemoryStatus => sender ! memoryStatus case GetStorageStatus => sender ! storageStatus case RemoveRdd(rddId) => sender ! removeRdd(rddId) case RemoveBlock(blockId) => removeBlockFromWorkers(blockId) sender ! true case RemoveExecutor(execId) => removeExecutor(execId) sender ! true case StopBlockManagerMaster => logInfo("Stopping BlockManagerMaster") sender ! true if (timeoutCheckingTask != null) { timeoutCheckingTask.cancel() } context.stop(self) case ExpireDeadHosts => expireDeadHosts() case HeartBeat(blockManagerId) => sender ! heartBeat(blockManagerId) case other => logWarning("Got unknown message: " + other) }
//BlockManagerSlaveActor处理的消息,主要用于master通知client删除block和RDD override def receive = { case RemoveBlock(blockId) => blockManager.removeBlock(blockId) case RemoveRdd(rddId) => val numBlocksRemoved = blockManager.removeRdd(rddId) sender ! numBlocksRemoved }
//blockmanager会调用BlockManagerWorker syncPutBlock和 syncGetBlock方法去远程拿数据或者写数据到远端 private[spark] object BlockManagerWorker extends Logging { private var blockManagerWorker: BlockManagerWorker = null def startBlockManagerWorker(manager: BlockManager) { blockManagerWorker = new BlockManagerWorker(manager) } //用于duplicate时往远端写数据 def syncPutBlock(msg: PutBlock, toConnManagerId: ConnectionManagerId): Boolean = { val blockManager = blockManagerWorker.blockManager val connectionManager = blockManager.connectionManager val blockMessage = BlockMessage.fromPutBlock(msg) val blockMessageArray = new BlockMessageArray(blockMessage) val resultMessage = connectionManager.sendMessageReliablySync( toConnManagerId, blockMessageArray.toBufferMessage) resultMessage.isDefined } //用于 task运行时获取远端的数据 def syncGetBlock(msg: GetBlock, toConnManagerId: ConnectionManagerId): ByteBuffer = { val blockManager = blockManagerWorker.blockManager val connectionManager = blockManager.connectionManager val blockMessage = BlockMessage.fromGetBlock(msg) val blockMessageArray = new BlockMessageArray(blockMessage) val responseMessage = connectionManager.sendMessageReliablySync( toConnManagerId, blockMessageArray.toBufferMessage) responseMessage match { case Some(message) => { val bufferMessage = message.asInstanceOf[BufferMessage] logDebug("Response message received " + bufferMessage) BlockMessageArray.fromBufferMessage(bufferMessage).foreach(blockMessage => { logDebug("Found " + blockMessage) return blockMessage.getData }) } case None => logDebug("No response message received") } null } }
远端的BlockManagerWorker会调用onBlockMessageReceive方法用来处理TYPE_PUT_BLOCK和TYPE_GET_BLOCK 这些事件
//BlockManagerWorker中的方法用来处理block 的读取,然后通过connectionManager回复response def onBlockMessageReceive(msg: Message, id: ConnectionManagerId): Option[Message] = { logDebug("Handling message " + msg) msg match { case bufferMessage: BufferMessage => { try { logDebug("Handling as a buffer message " + bufferMessage) val blockMessages = BlockMessageArray.fromBufferMessage(bufferMessage) logDebug("Parsed as a block message array") val responseMessages = blockMessages.map(processBlockMessage).filter(_ != None).map(_.get) Some(new BlockMessageArray(responseMessages).toBufferMessage) } catch { case e: Exception => logError("Exception handling buffer message", e) None } } case otherMessage: Any => { logError("Unknown type message received: " + otherMessage) None } } } def processBlockMessage(blockMessage: BlockMessage): Option[BlockMessage] = { blockMessage.getType match { case BlockMessage.TYPE_PUT_BLOCK => { val pB = PutBlock(blockMessage.getId, blockMessage.getData, blockMessage.getLevel) logDebug("Received [" + pB + "]") putBlock(pB.id, pB.data, pB.level) None } case BlockMessage.TYPE_GET_BLOCK => { val gB = new GetBlock(blockMessage.getId) logDebug("Received [" + gB + "]") val buffer = getBlock(gB.id) if (buffer == null) { return None } Some(BlockMessage.fromGotBlock(GotBlock(gB.id, buffer))) } case _ => None } }
ConnectonManager用于连接的建立,数据的传输和接收.主要用了nio socket
MemoryStore存储的结构是 private val entries = new LinkedHashMap[BlockId, Entry](32, 0.75f, true)
,在存数据前,先会查看是否有足够的memory,如果没有,会删除老的block。如果StorageLevel是useDisk,会将老的block写到disk
DiskStore会按照blockId中的name创建文件,并把数据写到文件中
ShuffleBlockManager是BlockManager的扩展,主要用于处理shuffle操作时,shufflewrite 数据的存储