我司要做一个在页面上托拉拽进行ETL的操作,类似于kettle,但我们不想用kettle这种C/S架构,我们要做B/S架构,封装为产品。而这种托拉拽图形,并最终形成执行逻辑去执行的过程是整个过程中最复杂的过程。因为我们解析图形内容,解析为可执行的逻辑,并根据依赖关系,并行执行,即在画布上托拉拽的逻辑本身就是个有前后依赖顺序的逻辑关系,我们需要解析为可执行的DAG有向无环图,并根据依赖关系正确的调度任务。
为此我们调研了xxl-job,azkaban两种技术,并进行了对比。
对于xxl-job,它是一种轻量的任务调度框架,可以很好的植入我们的系统,但是它不支持DAG这种有依赖关系的逻辑,它只支持一个父任务拆分多个子任务,而无法完成多个父任务合并为一个任务,即类似于join的操作,所以xxl-job被pass了。
对于azkaban,它也是一种轻量的任务调度框架,它也支持DAG,生态也很好,但是经过讨论我们还是抛弃了它,因为它的任务调度总是以zip包的形式进行提交,我们需要把执行逻辑翻译为job文件,打成zip包,通过api提交azkaban,不太友好,而且对我们来说有一些功能不能满足,所以我们最终也是抛弃了它,而选择自研。
通过github同性交友网站,我还是找到一个类似的,简单易懂,易于整合到我们系统的demo版本的调度小框架,地址:https://github.com/tovin-xu/task-scheduler
看过这个demo之后,给了我一些启发,我决定在它基础上整改一波,植入我们的系统,因为这个demo还是有很多问题的,譬如:
在此版本基础上,我对该逻辑进行了整理和完善,主要规整和完善了如下内容:
到这里已经满足了我们公司的基本需求,或者说主要逻辑、通用逻辑,当然为了适应我司的业务还需要加入很多嘈杂的部分,这部分就不再这里说了,至少这个版本是个通用的版本,如果大家有需要,可以直接将这部分植入系统,几乎可以不用改动,如果后续有优化或者问题,我都会实时更新,欢迎大家讨论,经过完善的github地址:https://github.com/TheBiiigBlue/task-scheduler.git
完整代码还请参考github地址:https://github.com/TheBiiigBlue/task-scheduler.git
主要调度代码:
package com.bigblue.scheduler.manager;
import com.bigblue.scheduler.base.enums.TaskStatus;
import com.bigblue.scheduler.base.log.SchedulerLogger;
import com.bigblue.scheduler.base.utils.GuavaUtils;
import com.bigblue.scheduler.domain.NodeTask;
import com.bigblue.scheduler.domain.ParentTask;
import com.bigblue.scheduler.domain.json.JsonContent;
import com.bigblue.scheduler.service.TaskListener;
import com.bigblue.scheduler.service.TaskScheduler;
import com.bigblue.scheduler.service.impl.SimpleTaskListener;
import com.google.common.collect.Maps;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.annotation.Order;
import org.springframework.stereotype.Component;
import org.springframework.util.CollectionUtils;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;
/**
* @Author: TheBigBlue
* @Description:
* @Date: 2020/6/11
*/
@Component("dagTaskScheduler")
@Order(1002)
public class DAGTaskScheduler implements TaskScheduler {
@Autowired
private TaskManager taskManager;
@Autowired
private TaskParser taskParser;
@Autowired
private SchedulerLogger logger;
/**
* 任务调度器线程Map (每个ParentTask 对应一个Thread)
*/
private Map<String, Thread> taskScheduleThreadMap = Maps.newConcurrentMap();
/**
* 可回调线程池
*/
private ListeningExecutorService pool = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(
Runtime.getRuntime().availableProcessors()));
@Override
public String parseTasksAndSchedule(JsonContent jsonContent) {
Map<String, NodeTask> nodeTasks = taskParser.parseNodeTasks(jsonContent);
return startNodeTasks(jsonContent.getJobId(), nodeTasks, new SimpleTaskListener());
}
/**
* 当前所有 NodeTasks 会被当成一个整体进行调度(形成一个 有向无环图 tasks)
*
* @param nodeTasks 所有的tasks
* @param statusListener 用于监听任务的状态
*/
@Override
public String startNodeTasks(String jobId, Map<String, NodeTask> nodeTasks, TaskListener statusListener) {
//分配partentTask
//创建partentTask
ParentTask parentTask = ParentTask.builder()
.id(jobId)
.nodeTasks(nodeTasks)
.nodeTaskSuccCnt(new AtomicInteger(0))
.taskListener(statusListener).build();
//启动partentTask
this.startParentTask(parentTask);
return jobId;
}
/**
* 开启线程,调度ParentTask
*
* @param parentTask
*/
private void startParentTask(ParentTask parentTask) {
String jobId = parentTask.getId();
if (taskScheduleThreadMap.get(jobId) == null) {
synchronized (taskScheduleThreadMap) {
//taskManager添加task
taskManager.addTask(parentTask);
//起一个线程调度parentTask
Thread scheduleThread = new Thread(() -> {
this.startTaskSchedule(jobId);
});
//维护threadmap
taskScheduleThreadMap.put(jobId, scheduleThread);
scheduleThread.start();
logger.getLogger(jobId).info("partentTask started! jobId: {}", jobId);
}
} else {
throw new RuntimeException("duplicate start parentTask:" + jobId);
}
}
/**
* 取消 ParentTask 调度
*
* @param jobId
*/
@Override
public void cancelTaskSchedule(String jobId, TaskStatus taskStatus) {
try {
// 可能两个NodeTask同时失败,同时取消
synchronized (taskScheduleThreadMap) {
if (taskScheduleThreadMap.get(jobId) != null) {
//更新taskManager任务状态并移除
taskManager.updateParentTaskStatus(jobId, taskStatus);
taskManager.removeTask(jobId);
//中断调度线程
taskScheduleThreadMap.get(jobId).interrupt();
taskScheduleThreadMap.remove(jobId);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**************************下面开始调度逻辑*****************************/
/**
* 开启任务调度
* 对于每个 parentTask,都需要新启一个独立的Thread去调度;
*
* @param jobId
*/
@Override
public void startTaskSchedule(String jobId) {
//运行没有依赖的task
runNoDependentNodeTasks(jobId);
while (true) {
try {
ParentTask parentTask = taskManager.getParentTask(jobId);
//校验是否可调度:判断依赖节点是否已执行
if (parentTask.isFailOrFinish()) {
logger.getLogger(jobId).info("nodeTask schedule finish or fail, jobId: {}", jobId);
break;
}
//获取准备调度的Task
List<NodeTask> nodeTasksToBeScheduled = taskManager.nodeTasksToBeScheduled(parentTask);
if (CollectionUtils.isEmpty(nodeTasksToBeScheduled)) {
//等待最后一个Task完成
while (!parentTask.isFailOrFinish()) {
}
//最后一个Task完成,退出
break;
}
//调度Task
for (NodeTask nodeTask : nodeTasksToBeScheduled) {
//可调度,提交任务
if (taskManager.canNodeTaskSchedule(jobId, nodeTask.getTaskId())) {
submitTask(jobId, nodeTask);
}
}
} catch (Exception e) {
logger.getLogger(jobId).error("nodeTask schedule fail, jobId: {}", jobId, e);
break;
}
}
//更新状态
ParentTask parentTask = taskManager.getParentTask(jobId);
logger.getLogger(jobId).info("jobId: {}, scheduled progress: {}", jobId, parentTask.getProgress());
Map<String, Object> resultMap = (Map<String, Object>) GuavaUtils.get(jobId);
TaskStatus parentTaskStatus;
if (parentTask.isFail()) {
parentTaskStatus = TaskStatus.fail;
logger.getLogger(jobId).error("jobId: {}, scheduled fail, thread exit", jobId);
} else {
parentTaskStatus = TaskStatus.success;
logger.getLogger(jobId).info("jobId: {}, scheduled success, thread exit", jobId);
}
//更新缓存状态
if (!CollectionUtils.isEmpty(resultMap)) {
resultMap.put("jobStatus", parentTaskStatus);
}
//终止调度线程
cancelTaskSchedule(jobId, parentTaskStatus);
//清除logger
logger.removeLogger(jobId);
}
/**
* 运行没有依赖的NodeTasks
*
* @param jobId
*/
private void runNoDependentNodeTasks(String jobId) {
List<NodeTask> noDependentNodeTasks = taskManager.getNoDependentNodeTasks(jobId);
if (CollectionUtils.isEmpty(noDependentNodeTasks)) {
throw new RuntimeException("there is no start tasks, nodeTasks may not be DAG");
}
noDependentNodeTasks.forEach(nodeTask -> submitTask(jobId, nodeTask));
taskManager.updateParentTaskStatus(jobId, TaskStatus.running);
}
/**
* 向线程池提交任务
*
* @param jobId
* @param nodeTask
* @return
*/
private void submitTask(String jobId, NodeTask nodeTask) {
String nodeTaskId = nodeTask.getTaskId();
try {
//向线程池提交任务
ListenableFuture future = pool.submit(nodeTask);
//更新状态
if (!taskManager.updateTaskStatus(jobId, nodeTaskId, TaskStatus.running)) {
//更新失败
throw new RuntimeException("update nodeTask status fail, jobId: " + jobId + ", nodeTaskId: " + nodeTaskId);
}
logger.getLogger(jobId).info("nodeTask has bean submitted successfully, jobId: {}, nodeTaskId: {}", jobId, nodeTask.getTaskId());
//设置异步回调
Futures.addCallback(future, new TaskExecCallback(jobId, nodeTaskId, taskManager, this), pool);
} catch (Exception e) {
logger.getLogger(jobId).error("nodeTask submit fail, jobId: {}, nodeTaskId: {}", jobId, nodeTaskId, e);
this.cancelTaskSchedule(jobId, TaskStatus.fail);
}
}
}