Hive on Spark源码分析(一)—— SparkTask
Hive on Spark源码分析(二)—— SparkSession与HiveSparkClient
Hive on Spark源码分析(三)—— SparkClilent与SparkClientImpl(上)
Hive on Spark源码分析(四)—— SparkClilent与SparkClientImpl(下)
Hive on Spark源码分析(五)—— RemoteDriver
Hive on Spark源码分析(六)—— RemoteSparkJobMonitor与JobHandle
SparkClientImpl提交任务的方法是submit方法,在HiveSparkClient中被调用,提交SparkSession需要提交的任务,其内部其实是调用了内部类ClientProtocol的submit方法:
@Override
public <T extends Serializable> JobHandle<T> submit(Job<T> job) {
return protocol.submit(job);
}
我们通过ClientProtocol的继承关系来看一下它到底是什么:
从图上可知,ClientProtocol是继承自io.netty.channel.SimpleChannelInboundHandler,它是负责底层的rpc通信与异步任务执行的,其中定义了submit、run、cancel、endSession这些job相关的方法,以及多个签名不同的handle方法,用以处理针对不同消息类型的rpc请求。
下面首先先看一下submit方法的实现,该方法负责提交SparkClient需要提交的任务,其中JobHandle可以认为是一个job的句柄,用来监控和控制一个正在运行的远程任务,会在后面的文章对JobHandle进行详细解析。
<T extends Serializable> JobHandleImpl<T> submit(Job<T> job) {
//利用java.utils.UUID产生一个jobId
final String jobId = UUID.randomUUID().toString();
//promise由EventLoopGroup分配线程进行执行,并封装在JobHandleImpl中,代理很多方法的实现.
final Promise<T> promise = driverRpc.createPromise();
//构造JobHandle
final JobHandleImpl<T> handle = new JobHandleImpl<T>(SparkClientImpl.this, promise, jobId);
jobs.put(jobId, handle);
//将jobId和job封装成JobRequest类型的消息,交给driverRpc来发送,最终返回的也是一个promise对象保存异步执行结果
final io.netty.util.concurrent.Future<Void> rpc = driverRpc.call(new JobRequest(jobId, job));
LOG.debug("Send JobRequest[{}].", jobId);
public <T> Future<T> call(Object msg, Class<T> retType) {
Preconditions.checkArgument(msg != null);
Preconditions.checkState(channel.isActive(), "RPC channel is closed.");
try {
final long id = rpcId.getAndIncrement();
final Promise<T> promise = createPromise();
- //注册client
dispatcher.registerRpc(id, promise, msg.getClass().getName());
synchronized (channelLock) {
//write和writeAndFlush都会返回ChannelFuture,如果
//id和message的类型封装为消息头MessageHeader,这里消息的类型为CALL
channel.write(new MessageHeader(id, Rpc.MessageType.CALL)).addListener(listener);
channel.writeAndFlush(msg).addListener(listener);
}
return promise;
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
ChannelFutureListener listener = new ChannelFutureListener() {
@Override
public void operationComplete(ChannelFuture cf) {
//isDone:任务因为正常终止,异常或者取消都会返回true(即只要是非运行状态)
if (!cf.isSuccess() && !promise.isDone()) {
LOG.warn("Failed to send RPC, closing connection.", cf.cause());
promise.setFailure(cf.cause());
dispatcher.discardRpc(id);
close();
}
}
};
rpc.addListener(new GenericFutureListener<io.netty.util.concurrent.Future<Void>>() {
@Override
public void operationComplete(io.netty.util.concurrent.Future<Void> f) {
//如果rpc sent成功,jobHandle的状态从sent改为queued
if (f.isSuccess()) {
handle.changeState(JobHandle.State.QUEUED);
} else if (!promise.isDone()) {
promise.setFailure(f.cause());
}
}
});
promise.addListener(new GenericFutureListener<Promise<T>>() {
@Override
public void operationComplete(Promise<T> p) {
if (jobId != null) {
//从任务列表里移除这个job
jobs.remove(jobId);
}
if (p.isCancelled() && !rpc.isDone()) {
rpc.cancel(true);
}
}
});
return handle;
<T extends Serializable> Future<T> run(Job<T> job) {
@SuppressWarnings("unchecked")
final io.netty.util.concurrent.Future<T> rpc = (io.netty.util.concurrent.Future<T>)
//将job封装成SyncJobRequest发送
driverRpc.call(new SyncJobRequest(job), Serializable.class);
return rpc;
}
void cancel(String jobId) {
//将jobId封装成CancelJob发送
driverRpc.call(new CancelJob(jobId));
}
Future> endSession() {
//直接发送EndSession消息
return driverRpc.call(new EndSession());
}
//在RpcDispatcher基础上定义了一系列内部类,代表不同的消息类型(或不同的事件 请求)
abstract class BaseProtocol extends RpcDispatcher {
protected static class CancelJob implements Serializable {...
}protected static class EndSession implements Serializable {...
}protected static class Error implements Serializable {...
}protected static class JobMetrics implements Serializable {...
}protected static class JobRequest<T extends Serializable> implements Serializable {...
}protected static class JobResult<T extends Serializable> implements Serializable {...
}protected static class JobStarted implements Serializable {..
}/**
* Inform the client that a new spark job has been submitted for the client job.
*/
protected static class JobSubmitted implements Serializable {...
}protected static class SyncJobRequest<T extends Serializable> implements Serializable {
final Job<T> job;
SyncJobRequest(Job<T> job) {
this.job = job;
}
SyncJobRequest() {
this(null);
}
}
//添加jar包
private static class AddJarJob implements Job<Serializable> {...
}//添加文件
private static class AddFileJob implements Job<Serializable> {...
}//获得spark executor数
private static class GetExecutorCountJob implements Job<Integer> {...
}//获得spark.default.parallelism
private static class GetDefaultParallelismJob implements Job<Integer> {...
}
private void handle(ChannelHandlerContext ctx, Error msg) {...
}private void handle(ChannelHandlerContext ctx, JobMetrics msg) {...
}private void handle(ChannelHandlerContext ctx, JobResult msg) {...
}private void handle(ChannelHandlerContext ctx, JobStarted msg) {...
}private void handle(ChannelHandlerContext ctx, JobSubmitted msg) {...
}
@Override
protected final void channelRead0(ChannelHandlerContext ctx, Object msg) throws Exception {
if (lastHeader == null) {
if (!(msg instanceof Rpc.MessageHeader)) {
LOG.warn("[{}] Expected RPC header, got {} instead.", name(),
msg != null ? msg.getClass().getName() : null);
throw new IllegalArgumentException();
}
lastHeader = (Rpc.MessageHeader) msg;
} else {
//log debug enable is needed
LOG.debug("[{}] Received RPC message: type={} id={} payload={}", name(),
lastHeader.type, lastHeader.id, msg != null ? msg.getClass().getName() : null);
try {
switch (lastHeader.type) {
case CALL:
handleCall(ctx, msg);
break;
case REPLY:
handleReply(ctx, msg, findRpc(lastHeader.id));
break;
case ERROR:
handleError(ctx, msg, findRpc(lastHeader.id));
break;
default:
throw new IllegalArgumentException("Unknown RPC message type: " + lastHeader.type);
}
} finally {
lastHeader = null;
}
}
}
private void handleCall(ChannelHandlerContext ctx, Object msg) throws Exception {
//得到相应的handle方法
Method handler = handlers.get(msg.getClass());
if (handler == null) {
handler = getClass().getDeclaredMethod("handle", ChannelHandlerContext.class,
msg.getClass());
handler.setAccessible(true);
handlers.put(msg.getClass(), handler);
}
Rpc.MessageType replyType;
Object replyPayload;
try {
replyPayload = handler.invoke(this, ctx, msg);
if (replyPayload == null) {
replyPayload = new Rpc.NullMessage();
}
replyType = Rpc.MessageType.REPLY;
} catch (InvocationTargetException ite) {
LOG.debug(String.format("[%s] Error in RPC handler.", name()), ite.getCause());
replyPayload = Throwables.getStackTraceAsString(ite.getCause());
replyType = Rpc.MessageType.ERROR;
}
ctx.channel().write(new Rpc.MessageHeader(lastHeader.id, replyType));
ctx.channel().writeAndFlush(replyPayload);
}
private void handle(ChannelHandlerContext ctx, JobStarted msg) {
JobHandleImpl> handle = jobs.get(msg.id);
if (handle != null) {
handle.changeState(JobHandle.State.STARTED);
} else {
LOG.warn("Received event for unknown job {}", msg.id);
}
}