这个接口是各种消息的统一接口,用来封装各种类型的消息。比如Rpc消息,start消息,stop消息,remote消息,方便在 Inbox Process 方法执行的时候,进行识别是 那类消息。
OneWay消息
Rpc消息
启动消息
stop消息
远程连接消息
远程断开消息
远程连接Error消息
这个就是 spark的收件箱,上面的各种类型的消息先存在在这个类的message这个LinkedList中。
属性:
protected val messages = new java.util.LinkedList[InboxMessage]() //保存message消息
private var stopped = false
private var enableConcurrent = false //是否运行多线程 同时处理Inbox message
private var numActiveThreads = 0 //多少个线程正在处理这个Inbox message
构造方法:
private[netty] class Inbox(
val endpointRef: NettyRpcEndpointRef,
val endpoint: RpcEndpoint)
extends Logging{
//启动的时候 在messages中加入 OnStart 的Message
inbox.synchronized {
messages.add(OnStart)
}
}
方法:
//message LinkedList 是否为 null
def isEmpty: Boolean = inbox.synchronized {
messages.isEmpty }
//丢弃消息
protected def onDrop(message: InboxMessage): Unit = {
logWarning(s"Drop $message because $endpointRef is stopped")
}
//停止 执行 case object OnStop message,属性参数复原
def stop(): Unit = inbox.synchronized {
// The following codes should be in `synchronized` so that we can make sure "OnStop" is the last
// message
if (!stopped) {
// We should disable concurrent here. Then when RpcEndpoint.onStop is called, it's the only
// thread that is processing messages. So `RpcEndpoint.onStop` can release its resources
// safely.
enableConcurrent = false
stopped = true
messages.add(OnStop)
// Note: The concurrent events in messages will be processed one by one.
}
}
//message LinkedList 投递消息,如果已经stop 则丢弃消息
def post(message: InboxMessage): Unit = inbox.synchronized {
if (stopped) {
// We already put "OnStop" into "messages", so we should drop further messages
onDrop(message)
} else {
messages.add(message)
false
}
}
//处理消息的方法,根据消息类型分别处理消息,尽可能多的一次处理所有保存的消息
//这个传进来的dispatcher变量,只有在OnStop 消息的时候,才会使用到,从dispatcher的属性中 通过endPoint remove endPointRef
def process(dispatcher: Dispatcher): Unit = {
var message: InboxMessage = null
//没有处理OnStart消息的时候始终只能有一个线程处理消息
//当处理了OnStart消息的时候且这个endPoint是ThreadSafeRpcEndpoint的话,就可以多个线程处理消息了
inbox.synchronized {
if (!enableConcurrent && numActiveThreads != 0) {
return
}
message = messages.poll()
if (message != null) {
numActiveThreads += 1
} else {
return
}
}
while (true) {
safelyCall(endpoint) {
message match {
//RpcMessage 需要等待replay
case RpcMessage(_sender, content, context) =>
try {
endpoint.receiveAndReply(context).applyOrElse[Any, Unit](content, {
msg =>
throw new SparkException(s"Unsupported message $message from ${_sender}")
})
} catch {
case e: Throwable =>
context.sendFailure(e)
// Throw the exception -- this exception will be caught by the safelyCall function.
// The endpoint's onError function will be called.
throw e
}
case OneWayMessage(_sender, content) =>
endpoint.receive.applyOrElse[Any, Unit](content, {
msg =>
throw new SparkException(s"Unsupported message $message from ${_sender}")
})
case OnStart =>
endpoint.onStart()
if (!endpoint.isInstanceOf[ThreadSafeRpcEndpoint]) {
inbox.synchronized {
if (!stopped) {
enableConcurrent = true
}
}
}
case OnStop =>
val activeThreads = inbox.synchronized {
inbox.numActiveThreads }
assert(activeThreads == 1,
s"There should be only a single active thread but found $activeThreads threads.")
dispatcher.removeRpcEndpointRef(endpoint)//stop的时候dispatcher remove这个endPoint
endpoint.onStop()
assert(isEmpty, "OnStop should be the last message")
case RemoteProcessConnected(remoteAddress) =>
endpoint.onConnected(remoteAddress)
case RemoteProcessDisconnected(remoteAddress) =>
endpoint.onDisconnected(remoteAddress)
case RemoteProcessConnectionError(cause, remoteAddress) =>
endpoint.onNetworkError(cause, remoteAddress)
}
}
inbox.synchronized {
// "enableConcurrent" will be set to false after `onStop` is called, so we should check it
// every time.
if (!enableConcurrent && numActiveThreads != 1) {
// If we are not the only one worker, exit
numActiveThreads -= 1
return
}
message = messages.poll()
if (message == null) {
numActiveThreads -= 1
return
}
}
}
}
这个是一个内部类,一个有一个属性就是 上面的Inbox,即每一个EndpointData的实例都会有一个Inbox的属性。
private class EndpointData(
val name: String,
val endpoint: RpcEndpoint,
val ref: NettyRpcEndpointRef) {
val inbox = new Inbox(ref, endpoint)
}
这个类负责保存已经注册的RpcEndpoint和投递消息到对应的EndpointData的Inbox中,所以其内部属性有2个ConcurrentHashMap用来保存
EndpointName-》EndpointData,EndpointData-》RpcEndpointRef这2个 实例。
属性:
//保存已经注册的endPointName和EndpointData,EndpointData是一个内部类
//可以得出 一个唯一name的 RpcEndpoint 会有唯一的 EndpointData,也会有唯一的 Inbox
private val endpoints: ConcurrentMap[String, EndpointData] =
new ConcurrentHashMap[String, EndpointData]
//保存RpcEndpoint-》RpcEndpointRef键值对
private val endpointRefs: ConcurrentMap[RpcEndpoint, RpcEndpointRef] =
new ConcurrentHashMap[RpcEndpoint, RpcEndpointRef]
//保存收到的EndpointData信息到 LinkedBlockingQueue,先从endpoints 通过name标识 get
//出 EndpointData ,再把真正的message post到 其内部(Inbox)的发件箱的消息队列属性messages中,等待处理。
//待处理的消息队列
private val receivers = new LinkedBlockingQueue[EndpointData]
//标示 运行状态
private var stopped = false
//这里会根据配置的 CPU核数和配置的核数 算出核数,启动线程池,来处理 为处理的消息队列。
//这些 线程的名称以 dispatcher-event-loop 开头
private val threadpool: ThreadPoolExecutor = {
}
方法:
//注册一个别称为name的endPoint
def registerRpcEndpoint(name: String, endpoint: RpcEndpoint): NettyRpcEndpointRef = {
val addr = RpcEndpointAddress(nettyEnv.address, name)//创建一个RpcEndpointAddress地址
val endpointRef = new NettyRpcEndpointRef(nettyEnv.conf, addr, nettyEnv)//创建一个对应的Ref NettyRpcEndpointRef,使用的是 dispather内部的nettyRpcEnv 的 host 和 port
synchronized {
if (stopped) {
throw new IllegalStateException("RpcEnv has been stopped")
}
if (endpoints.putIfAbsent(name, new EndpointData(name, endpoint, endpointRef)) != null) {
throw new IllegalArgumentException(s"There is already an RpcEndpoint called $name")
}
val data = endpoints.get(name)
endpointRefs.put(data.endpoint, data.ref)
receivers.offer(data) // for the OnStart message
}
endpointRef
}
//通过endpoint获取对应的endpointRef
def getRpcEndpointRef(endpoint: RpcEndpoint): RpcEndpointRef = endpointRefs.get(endpoint)
//通过endpoint移除对应的endpointRef
def removeRpcEndpointRef(endpoint: RpcEndpoint): Unit = endpointRefs.remove(endpoint)
//通过name注销rpcEndpoint
private def unregisterRpcEndpoint(name: String): Unit = {
val data = endpoints.remove(name)
if (data != null) {
data.inbox.stop()
receivers.offer(data) // for the OnStop message
}
}
//通过rpcEndpointRef stop对应的rpcEndpoint
def stop(rpcEndpointRef: RpcEndpointRef): Unit = {
synchronized {
if (stopped) {
// This endpoint will be stopped by Dispatcher.stop() method.
return
}
unregisterRpcEndpoint(rpcEndpointRef.name)
}
}
//投递消息到对应的 name 的 endpointData 的 Inbox中,再发送endpointData到 待处理的消息队列中
private def postMessage(
endpointName: String,
message: InboxMessage,
callbackIfStopped: (Exception) => Unit): Unit = {
val error = synchronized {
val data = endpoints.get(endpointName)
if (stopped) {
Some(new RpcEnvStoppedException())
} else if (data == null) {
Some(new SparkException(s"Could not find $endpointName."))
} else {
data.inbox.post(message)
receivers.offer(data)
None
}
}
// We don't need to call `onStop` in the `synchronized` block
error.foreach(callbackIfStopped)
}
//验证是否存在这个name的endPoint
def verify(name: String): Boolean = {
endpoints.containsKey(name)
}
这个类是处理 socket 消息处理类。
里面有2个receiver方法,用来接受消息;channelActive 用来是发送消息。
这2个receive方法是重载的,区别在于一个需要返回响应,即会要回调。
反序列化数据流,拿到 发送方地址,接收方地址,RpcEndpoint name 等信息,包装成RequestMessage的实例, 最后投递到 NettyRpcHandler class 属性 dispatcher 中的属性中的对应的 Inbox中(参考 Dispatcher class)
receiver:
//会要回调 所以是 postRemoteMessage
override def receive(
client: TransportClient,
message: ByteBuffer,
callback: RpcResponseCallback): Unit = {
val messageToDispatch = internalReceive(client, message)
dispatcher.postRemoteMessage(messageToDispatch, callback)
}
//不需要 回调 所以是 postOneWayMessage
override def receive(
client: TransportClient,
message: ByteBuffer): Unit = {
val messageToDispatch = internalReceive(client, message)
dispatcher.postOneWayMessage(messageToDispatch)
}
其中有一个internalReceive的方法,反序列化数据流,拿到 发送方地址,接收方地址,RpcEndpoint name 等信息,包装成RequestMessage的实例。
private def internalReceive(client: TransportClient, message: ByteBuffer): RequestMessage = {
val addr = client.getChannel().remoteAddress().asInstanceOf[InetSocketAddress]
assert(addr != null)
val clientAddr = RpcAddress(addr.getHostString, addr.getPort)
//这里完成后 已经是 反序列化后的 数据了
val requestMessage = RequestMessage(nettyEnv, client, message)
if (requestMessage.senderAddress == null) {
// Create a new message with the socket address of the client as the sender.
new RequestMessage(clientAddr, requestMessage.receiver, requestMessage.content)
} else {
// The remote RpcEnv listens to some port, we should also fire a RemoteProcessConnected for
// the listening address
val remoteEnvAddress = requestMessage.senderAddress
if (remoteAddresses.putIfAbsent(clientAddr, remoteEnvAddress) == null) {
dispatcher.postToAll(RemoteProcessConnected(remoteEnvAddress))
}
requestMessage
}
}
这个object中一共有2个方法:
//这个方法用来读取 数据流中的 host 和 port 的地址信息
private def readRpcAddress(in: DataInputStream): RpcAddress = {
val hasRpcAddress = in.readBoolean()
if (hasRpcAddress) {
RpcAddress(in.readUTF(), in.readInt())
} else {
null
}
}
//用来处理 socket 接收到的 数据流
def apply(nettyEnv: NettyRpcEnv, client: TransportClient, bytes: ByteBuffer): RequestMessage = {
val bis = new ByteBufferInputStream(bytes)
val in = new DataInputStream(bis)
try {
//获取流头部的 host和port 信息
val senderAddress = readRpcAddress(in) //这个地址是发送这的地址
//这里的地址是 接受者的地址,即接收到这个信息的机器的地址
//后面的in.readUTF() 是获取 这个endPoint 的name
//最后组成RpcEndpointAddress
val endpointAddress = RpcEndpointAddress(readRpcAddress(in), in.readUTF())
//在组成一个 NettyRpcEndpointRef
val ref = new NettyRpcEndpointRef(nettyEnv.conf, endpointAddress, nettyEnv)
ref.client = client
//返回新的 RequestMessage
new RequestMessage(
senderAddress,
ref,
// The remaining bytes in `bytes` are the message content.
nettyEnv.deserialize(client, bytes) //这里是反序列化后的对象
)
} finally {
in.close()
}
}
这个类就是包装了消息发送放的RpcAddress,接收方的NettyRpcEndpointRef,还有接收的已经反序列化的内容。所以它的构造方法是下面的样子。
class RequestMessage(
val senderAddress: RpcAddress,
val receiver: NettyRpcEndpointRef,
val content: Any)
方法:
//序列化 发送方,接收方的地址信息 和 方法内容到 数据流
//这个方法是在 发送信息的时候使用
def serialize(nettyEnv: NettyRpcEnv): ByteBuffer = {
val bos = new ByteBufferOutputStream()
val out = new DataOutputStream(bos)
try {
//当然发送消息的时候,这里 的 senderAddress 就是本机的地址了
writeRpcAddress(out, senderAddress)
writeRpcAddress(out, receiver.address)
out.writeUTF(receiver.name)
val s = nettyEnv.serializeStream(out)
try {
s.writeObject(content)
} finally {
s.close()
}
} finally {
out.close()
}
bos.toByteBuffer
}
//这个是序列化 rpcAddress 信息到 数据流
private def writeRpcAddress(out: DataOutputStream, rpcAddress: RpcAddress): Unit = {
if (rpcAddress == null) {
out.writeBoolean(false)
} else {
out.writeBoolean(true)
out.writeUTF(rpcAddress.host)
out.writeInt(rpcAddress.port)
}
}
当NettyRpcEnv class 的startServer 方法启动 socket 服务之后。当接受到客户端的一个请求的时候,为委托给NettyRpcHandler class 处理(这一部分的只是 可以学习 netty 网络框架)。
NettyRpcHandler class 里面会 dispacher 和 nettyRpcEnv、streamManager。
dispacher 用来分发(已经反序列化后)消息到对应的endPoint 的Inbox;
nettyRpcEnv 主要是要使用其 反序列化方法,来序列化信息。
经过NettyRpcHandler receive 和 internalReceiver 处理之后,就会返回一个新的 RequestMessage.
最后把这个消息投递到对应的endPoint的Ibox中,再由dispacher 的线程池 调度执行 Inbox的process的方法。
这个是Outbox 消息的总接口,一共有2个实现类:OneWayOutboxMessage、RpcOutboxMessage。一个是发送即忘记,另一个是需要回调的。
注意,如果消息传递到OutboxMessage的时候,已经是序列化后的消息了。
一共有2个方法:
//发送方法 TransportClient就是发送的客户端
def sendWith(client: TransportClient): Unit
//发送失败处理
def onFailure(e: Throwable): Unit
这个类就是 发送即忘记 的具体实现类。
private[netty] case class OneWayOutboxMessage(content: ByteBuffer) extends OutboxMessage
with Logging {
override def sendWith(client: TransportClient): Unit = {
client.send(content)
}
override def onFailure(e: Throwable): Unit = {
e match {
case e1: RpcEnvStoppedException => logDebug(e1.getMessage)
case e1: Throwable => logWarning(s"Failed to send one-way RPC.", e1)
}
}
}
这个类就是 发送获取结果后需要回调的具体实现类,所以这里又实现了RpcResponseCallback。因为需要回调,所以在构造的时候,需要传入 _onSuccess 的函数,自己处理结果。
private[netty] case class RpcOutboxMessage(
content: ByteBuffer,
_onFailure: (Throwable) => Unit,
_onSuccess: (TransportClient, ByteBuffer) => Unit)
extends OutboxMessage with RpcResponseCallback with Logging {
private var client: TransportClient = _
private var requestId: Long = _
override def sendWith(client: TransportClient): Unit = {
this.client = client
this.requestId = client.sendRpc(content, this)
}
def onTimeout(): Unit = {
if (client != null) {
client.removeRpcRequest(requestId)
} else {
logError("Ask timeout before connecting successfully")
}
}
override def onFailure(e: Throwable): Unit = {
_onFailure(e)
}
override def onSuccess(response: ByteBuffer): Unit = {
_onSuccess(client, response)
}
}
Outbox是发送消息的主类,组合了 nettyRpcEnv和rpcAddress。nettyRpcEnv里面又netty的发送客户端,负责发送和接收消息结果;rpcAddress主要在创建客户端和接收消息结果时使用。
下面来看看构造方法:
//这里的address: RpcAddress 是别的主机的地址,在发送消息的时候需要 指定目标的host 和 port
private[netty] class Outbox(nettyEnv: NettyRpcEnv, val address: RpcAddress) {
//内部存储 OutboxMessage,处理OutboxMessage的时候,从这里获取
private val messages = new java.util.LinkedList[OutboxMessage]
//发送消息的客户端,第一次的时候是null,则会创建一个 client。
private var client: TransportClient = null
private var connectFuture: java.util.concurrent.Future[Unit] = null
private var stopped = false
//标示 是否有线程正在处理 消息
private var draining = false
}
下面是这个类的方法:
//这个方法 主要在nettyRpcEnv 类中调用,用来把OutboxMessage消息put到本类的messages LinkedList 中,再执行drainOutbox 方法,处理 LinkedList中的消息
//发送消息是不用 启动服务的,所以不用后台程序一直存在的
def send(message: OutboxMessage): Unit = {
val dropped = synchronized {
if (stopped) {
true
} else {
messages.add(message)
false
}
}
if (dropped) {
message.onFailure(new SparkException("Message is dropped because Outbox is stopped"))
} else {
drainOutbox()
}
}
//处理消息的主方法,尽可能一次处理多个消息
private def drainOutbox(): Unit = {
var message: OutboxMessage = null
synchronized {
if (stopped) {
return
}
if (connectFuture != null) {
// We are connecting to the remote address, so just exit
return
}
if (client == null) {
// There is no connect task but client is null, so we need to launch the connect task.
//创建一个 发送客户端,递归 调用 drainOutbox 方法,所以先、这里有个 return,防止无限递归
launchConnectTask()
return
}
if (draining) {
// There is some thread draining, so just exit
return
}//获取到一个消息
message = messages.poll()
if (message == null) {
return
}
draining = true
}
while (true) {
try {
val _client = synchronized {
client }
if (_client != null) {
//使用OutboxMessage子类的发送方法发送消息
message.sendWith(_client)
} else {
assert(stopped == true)
}
} catch {
case NonFatal(e) =>
handleNetworkFailure(e)
return
}
synchronized {
if (stopped) {
return
}
message = messages.poll()
if (message == null) {
draining = false
return
}
}
}
}