spark学习-41-Spark的块传输服务BlockTransferService

1.blockTransferService默认为NettyBlockTransferService ,它使用Netty法人一步时间驱动的网络应用框架,提供web服务及客户端,获取远程节点上的Block集合。

2.块传输服务BlockTransferService是在SparkEvn初始化的时候创建的

// =================================.创建块传输服务BlockTransferService;===========================
    /*
        blockTransferService默认为NettyBlockTransferService(可以配置属相spark.shuffle.blockTransferService使用NioBlockTransferService)
        ,它使用Netty法人一步时间驱动的网络应用框架,提供web服务及客户端,获取远程节点上的Block集合。
     */
    val blockTransferService =
      new NettyBlockTransferService(conf, securityManager, bindAddress, advertiseAddress,
        blockManagerPort, numUsableCores)

3。NettyBlockTransferService继承与BlockTransferService,BlockTransferService中有如下方法


package org.apache.spark.network

import java.io.{Closeable, File}
import java.nio.ByteBuffer

import scala.concurrent.{Future, Promise}
import scala.concurrent.duration.Duration
import scala.reflect.ClassTag

import org.apache.spark.internal.Logging
import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient}
import org.apache.spark.storage.{BlockId, StorageLevel}
import org.apache.spark.util.ThreadUtils

private[spark]
abstract class BlockTransferService extends ShuffleClient with Closeable with Logging {

  /**
   * Initialize the transfer service by giving it the BlockDataManager that can be used to fetch
   * local blocks or put local blocks.
    * 通过提供可以用来获取本地块或放置本地块的BlockDataManager来初始化传输服务。
   */
  def init(blockDataManager: BlockDataManager): Unit

  /**
   * Tear down the transfer service.
    * 拆除transfer服务
   */
  def close(): Unit

  /**
   * Port number the service is listening on, available only after [[init]] is invoked.
    * 服务正在监听的端口号,只有在[[init]]调用后才可用。
   */
  def port: Int

  /**
   * Host name the service is listening on, available only after [[init]] is invoked.
    * 服务的主机名是监听的,只有在[[init]]调用后才可以使用。
   */
  def hostName: String

  /**
   * Fetch a sequence of blocks from a remote node asynchronously,
   * available only after [[init]] is invoked.
    * 以异步方式从远程节点获取块序列,仅在[[init]]调用后才可用。
   *
   * Note that this API takes a sequence so the implementation can batch requests, and does not
   * return a future so the underlying implementation can invoke onBlockFetchSuccess as soon as
   * the data of a block is fetched, rather than waiting for all blocks to be fetched.
    *
    * 请注意,这个API采用了一个序列,因此实现可以批量请求,而且不会返回一个future,因此底层实现可以在一个块的数据被获取时调用
    * onBlockFetchSuccess,而不是等待所有的块都被获取。
   */
  override def fetchBlocks(
      host: String,
      port: Int,
      execId: String,
      blockIds: Array[String],
      listener: BlockFetchingListener,
      shuffleFiles: Array[File]): Unit

  /**
   * Upload a single block to a remote node, available only after [[init]] is invoked.
    * 将单个块上载到远程节点,仅在[[init]]之后才可使用。
    *
   */
  def uploadBlock(
      hostname: String,
      port: Int,
      execId: String,
      blockId: BlockId,
      blockData: ManagedBuffer,
      level: StorageLevel,
      classTag: ClassTag[_]): Future[Unit]

  /**
   * A special case of [[fetchBlocks]], as it fetches only one block and is blocking.
    * 一个特殊的例子[[fetchBlocks]],因为它只读取一个块并且阻塞。
   *
   * It is also only available after [[init]] is invoked.
    * 只有在调用[[init]]后才可以使用它。
   */
  def fetchBlockSync(host: String, port: Int, execId: String, blockId: String): ManagedBuffer = {
    // 监控等待的线程.
    // A monitor for the thread to wait on.
    val result = Promise[ManagedBuffer]()
    fetchBlocks(host, port, execId, Array(blockId),
      new BlockFetchingListener {
        override def onBlockFetchFailure(blockId: String, exception: Throwable): Unit = {
          result.failure(exception)
        }
        override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = {
          val ret = ByteBuffer.allocate(data.size.toInt)
          ret.put(data.nioByteBuffer())
          ret.flip()
          result.success(new NioManagedBuffer(ret))
        }
      }, shuffleFiles = null)
    ThreadUtils.awaitResult(result.future, Duration.Inf)
  }

  /**
   * Upload a single block to a remote node, available only after [[init]] is invoked.
    * 将单个块上载到远程节点,仅在[[init]]之后才可使用。
   *
   * This method is similar to [[uploadBlock]], except this one blocks the thread
   * until the upload finishes.
    * 这种方法类似于[[uploadBlock]],除非这个方法阻塞线程,直到上传完成。
   */
  def uploadBlockSync(
      hostname: String,
      port: Int,
      execId: String,
      blockId: BlockId,
      blockData: ManagedBuffer,
      level: StorageLevel,
      classTag: ClassTag[_]): Unit = {
    val future = uploadBlock(hostname, port, execId, blockId, blockData, level, classTag)
    ThreadUtils.awaitResult(future, Duration.Inf)
  }
}

4。看代码


package org.apache.spark.network.netty

import java.io.File
import java.nio.ByteBuffer

import scala.collection.JavaConverters._
import scala.concurrent.{Future, Promise}
import scala.reflect.ClassTag

import org.apache.spark.{SecurityManager, SparkConf}
import org.apache.spark.network._
import org.apache.spark.network.buffer.ManagedBuffer
import org.apache.spark.network.client.{RpcResponseCallback, TransportClientBootstrap, TransportClientFactory}
import org.apache.spark.network.crypto.{AuthClientBootstrap, AuthServerBootstrap}
import org.apache.spark.network.server._
import org.apache.spark.network.shuffle.{BlockFetchingListener, OneForOneBlockFetcher, RetryingBlockFetcher}
import org.apache.spark.network.shuffle.protocol.UploadBlock
import org.apache.spark.network.util.JavaUtils
import org.apache.spark.serializer.JavaSerializer
import org.apache.spark.storage.{BlockId, StorageLevel}
import org.apache.spark.util.Utils

/**
 * A BlockTransferService that uses Netty to fetch a set of blocks at time.
  * 一个块传输服务,它使用Netty在同一时间内获取一组块的集合。
 */
private[spark] class NettyBlockTransferService(
    conf: SparkConf,
    securityManager: SecurityManager,
    bindAddress: String,
    override val hostName: String,
    _port: Int,
    numCores: Int)
  extends BlockTransferService {

  // TODO: Don't use Java serialization, use a more cross-version compatible serialization format.
  // 不要使用java的序列化 使用更跨版本的兼容序列化格式。
  private val serializer = new JavaSerializer(conf)
  // Spark负责安全的类。 检查是否启用了Spark通信协议的身份验证。
  private val authEnabled = securityManager.isAuthenticationEnabled()
  private val transportConf = SparkTransportConf.fromSparkConf(conf, "shuffle", numCores)

  private[this] var transportContext: TransportContext = _
  private[this] var server: TransportServer = _
  private[this] var clientFactory: TransportClientFactory = _
  private[this] var appId: String = _

  /**
      1.创建RpcServer;
      2.构建TransportContext;
      3.创建RPC客户端工厂TransportClientFactory;
      4.创建Netty服务器TraansportServer,可以修改属性spark.blockManager.port
        (默认为0,表示随机选择)改变TransportServer的端口。
    */
  override def init(blockDataManager: BlockDataManager): Unit = {
    // 1.创建RpcServer;
    val rpcHandler = new NettyBlockRpcServer(conf.getAppId, serializer, blockDataManager)
    var serverBootstrap: Option[TransportServerBootstrap] = None
    var clientBootstrap: Option[TransportClientBootstrap] = None
    if (authEnabled) {
      serverBootstrap = Some(new AuthServerBootstrap(transportConf, securityManager))
      clientBootstrap = Some(new AuthClientBootstrap(transportConf, conf.getAppId, securityManager))
    }
    // 2.构建TransportContext;
    transportContext = new TransportContext(transportConf, rpcHandler)
    clientFactory = transportContext.createClientFactory(clientBootstrap.toSeq.asJava)
    server = createServer(serverBootstrap.toList)
    appId = conf.getAppId
    logInfo(s"Server created on ${hostName}:${server.getPort}")
  }



  /** Creates and binds the TransportServer, possibly trying multiple ports. */
  private def createServer(bootstraps: List[TransportServerBootstrap]): TransportServer = {
    def startService(port: Int): (TransportServer, Int) = {
      val server = transportContext.createServer(bindAddress, port, bootstraps.asJava)
      (server, server.getPort)
    }

    Utils.startServiceOnPort(_port, startService, conf, getClass.getName)._1
  }

  /**
    * NettyBlockTransferService的fetchBlocks方法用于获取远程shuffle文件,实际上是利用NettyBlockTransferService
    * 中创建的netty服务。
    *
    *  获取远程节点上的shuffle文件
    */
  override def fetchBlocks(
      host: String,
      port: Int,
      execId: String,
      blockIds: Array[String],
      listener: BlockFetchingListener,
      shuffleFiles: Array[File]): Unit = {
    logTrace(s"Fetch blocks from $host:$port (executor id $execId)")
    try {
      val blockFetchStarter = new RetryingBlockFetcher.BlockFetchStarter {
        override def createAndStart(blockIds: Array[String], listener: BlockFetchingListener) {
          //通过C/S模式从远程进行通信,来拉去数据。
          val client = clientFactory.createClient(host, port)
          new OneForOneBlockFetcher(client, appId, execId, blockIds.toArray, listener,
            transportConf, shuffleFiles).start()
        }
      }

      val maxRetries = transportConf.maxIORetries()
      if (maxRetries > 0) {
        // Note this Fetcher will correctly handle maxRetries == 0; we avoid it just in case there's
        // a bug in this code. We should remove the if statement once we're sure of the stability.
        new RetryingBlockFetcher(transportConf, blockFetchStarter, blockIds, listener).start()
      } else {
        blockFetchStarter.createAndStart(blockIds, listener)
      }
    } catch {
      case e: Exception =>
        logError("Exception while beginning fetchBlocks", e)
        blockIds.foreach(listener.onBlockFetchFailure(_, e))
    }
  }

  override def port: Int = server.getPort

  /**
    * NettyBlockTransferService的uploadBlock方法用于上传shuffle文件到远程Executor,实际上也是利用NettyBlockTransferService
    * 中创建的Netty服务。其中步骤如下:
    *   1.创建Netty服务的客户端,客户端连接的hostname和port正式我们随机选择的BlockManager的hostname和port.
    *   2.将Block的存储级别Storagelevel序列化。
    *   3.将BlockByteBuffer转换为数组,便于序列化。
    *   4.将appId,execId,blockId,序列化的Storagelevel,转换为数组的Block封装为UploadBlock,并将UploadBlock序列化为字节数组。
    *   5.最终调用Netty客户端的sendRpc方法将字节数组上传,回调函数RpcResponseCallback根据RPC的结果更改上传状态。
    */
  override def uploadBlock(
      hostname: String,
      port: Int,
      execId: String,
      blockId: BlockId,
      blockData: ManagedBuffer,
      level: StorageLevel,
      classTag: ClassTag[_]): Future[Unit] = {
    val result = Promise[Unit]()
    val client = clientFactory.createClient(hostname, port)

    // StorageLevel and ClassTag are serialized as bytes using our JavaSerializer.
    // Everything else is encoded using our binary protocol.
    val metadata = JavaUtils.bufferToArray(serializer.newInstance().serialize((level, classTag)))

    // Convert or copy nio buffer into array in order to serialize it.
    val array = JavaUtils.bufferToArray(blockData.nioByteBuffer())

    client.sendRpc(new UploadBlock(appId, execId, blockId.toString, metadata, array).toByteBuffer,
      new RpcResponseCallback {
        override def onSuccess(response: ByteBuffer): Unit = {
          logTrace(s"Successfully uploaded block $blockId")
          result.success((): Unit)
        }
        override def onFailure(e: Throwable): Unit = {
          logError(s"Error while uploading block $blockId", e)
          result.failure(e)
        }
      })

    result.future
  }

  override def close(): Unit = {
    if (server != null) {
      server.close()
    }
    if (clientFactory != null) {
      clientFactory.close()
    }
  }
}

你可能感兴趣的:(大数据-spark)