本文主要研究一下PowerJob的ContainerController
tech/powerjob/server/web/controller/ContainerController.java
@Slf4j
@RestController
@RequestMapping("/container")
public class ContainerController {
private final int port;
private final ContainerService containerService;
private final AppInfoRepository appInfoRepository;
private final ContainerInfoRepository containerInfoRepository;
public ContainerController(@Value("${server.port}") int port, ContainerService containerService, AppInfoRepository appInfoRepository, ContainerInfoRepository containerInfoRepository) {
this.port = port;
this.containerService = containerService;
this.appInfoRepository = appInfoRepository;
this.containerInfoRepository = containerInfoRepository;
}
@GetMapping("/downloadJar")
public void downloadJar(String version, HttpServletResponse response) throws IOException {
File file = containerService.fetchContainerJarFile(version);
if (file.exists()) {
OmsFileUtils.file2HttpResponse(file, response);
} else {
log.error("[Container] can't find container by version[{}], please deploy first!", version);
}
}
@PostMapping("/downloadContainerTemplate")
public void downloadContainerTemplate(@RequestBody GenerateContainerTemplateRequest req, HttpServletResponse response) throws IOException {
File zipFile = ContainerTemplateGenerator.generate(req.getGroup(), req.getArtifact(), req.getName(), req.getPackageName(), req.getJavaVersion());
OmsFileUtils.file2HttpResponse(zipFile, response);
}
@PostMapping("/jarUpload")
public ResultDTO fileUpload(@RequestParam("file") MultipartFile file) throws Exception {
if (file == null || file.isEmpty()) {
return ResultDTO.failed("empty file");
}
return ResultDTO.success(containerService.uploadContainerJarFile(file));
}
@PostMapping("/save")
public ResultDTO saveContainer(@RequestBody SaveContainerInfoRequest request) {
request.valid();
ContainerInfoDO container = new ContainerInfoDO();
BeanUtils.copyProperties(request, container);
container.setSourceType(request.getSourceType().getV());
container.setStatus(request.getStatus().getV());
containerService.save(container);
return ResultDTO.success(null);
}
@GetMapping("/delete")
public ResultDTO deleteContainer(Long appId, Long containerId) {
containerService.delete(appId, containerId);
return ResultDTO.success(null);
}
@GetMapping("/list")
public ResultDTO> listContainers(Long appId) {
List res = containerInfoRepository.findByAppIdAndStatusNot(appId, SwitchableStatus.DELETED.getV())
.stream().map(ContainerController::convert).collect(Collectors.toList());
return ResultDTO.success(res);
}
@GetMapping("/listDeployedWorker")
public ResultDTO listDeployedWorker(Long appId, Long containerId, HttpServletResponse response) {
AppInfoDO appInfoDO = appInfoRepository.findById(appId).orElseThrow(() -> new IllegalArgumentException("can't find app by id:" + appId));
String targetServer = appInfoDO.getCurrentServer();
if (StringUtils.isEmpty(targetServer)) {
return ResultDTO.failed("No workers have even registered!");
}
return ResultDTO.success(containerService.fetchDeployedInfo(appId, containerId));
}
private static ContainerInfoVO convert(ContainerInfoDO containerInfoDO) {
ContainerInfoVO vo = new ContainerInfoVO();
BeanUtils.copyProperties(containerInfoDO, vo);
if (containerInfoDO.getLastDeployTime() == null) {
vo.setLastDeployTime("N/A");
}else {
vo.setLastDeployTime(DateFormatUtils.format(containerInfoDO.getLastDeployTime(), OmsConstant.TIME_PATTERN));
}
SwitchableStatus status = SwitchableStatus.of(containerInfoDO.getStatus());
vo.setStatus(status.name());
ContainerSourceType sourceType = ContainerSourceType.of(containerInfoDO.getSourceType());
vo.setSourceType(sourceType.name());
return vo;
}
}
ContainerController提供了downloadJar、downloadContainerTemplate、jarUpload、save、delete、list、listDeployedWorker接口,其中downloadJar、jarUpload、save、delete委托给了containerService
tech/powerjob/server/core/container/ContainerService.java
@Slf4j
@Service
public class ContainerService {
@Resource
private Environment environment;
@Resource
private LockService lockService;
@Resource
private ContainerInfoRepository containerInfoRepository;
@Resource
private GridFsManager gridFsManager;
@Resource
private TransportService transportService;
@Resource
private WorkerClusterQueryService workerClusterQueryService;
// 下载用的分段锁
private final SegmentLock segmentLock = new SegmentLock(4);
// 并发部署的机器数量
private static final int DEPLOY_BATCH_NUM = 50;
// 部署间隔
private static final long DEPLOY_MIN_INTERVAL = 10 * 60 * 1000L;
// 最长部署时间
private static final long DEPLOY_MAX_COST_TIME = 10 * 60 * 1000L;
//......
}
ContainerService提供了save、delete、uploadContainerJarFile、fetchContainerJarFile、deploy、fetchDeployedInfo方法
public void save(ContainerInfoDO container) {
Long originId = container.getId();
if (originId != null) {
// just validate
containerInfoRepository.findById(originId).orElseThrow(() -> new IllegalArgumentException("can't find container by id: " + originId));
} else {
container.setGmtCreate(new Date());
}
container.setGmtModified(new Date());
// 文件上传形式的 sourceInfo 为该文件的 md5 值,Git形式的 md5 在部署阶段生成
if (container.getSourceType() == ContainerSourceType.FatJar.getV()) {
container.setVersion(container.getSourceInfo());
}else {
container.setVersion("init");
}
containerInfoRepository.saveAndFlush(container);
}
save方法通过containerInfoRepository.saveAndFlush进行保存
public void delete(Long appId, Long containerId) {
ContainerInfoDO container = containerInfoRepository.findById(containerId).orElseThrow(() -> new IllegalArgumentException("can't find container by id: " + containerId));
if (!Objects.equals(appId, container.getAppId())) {
throw new RuntimeException("Permission Denied!");
}
ServerDestroyContainerRequest destroyRequest = new ServerDestroyContainerRequest(container.getId());
workerClusterQueryService.getAllAliveWorkers(container.getAppId()).forEach(workerInfo -> {
final URL url = ServerURLFactory.destroyContainer2Worker(workerInfo.getAddress());
transportService.tell(workerInfo.getProtocol(), url, destroyRequest);
});
log.info("[ContainerService] delete container: {}.", container);
// 软删除
container.setStatus(SwitchableStatus.DELETED.getV());
container.setGmtModified(new Date());
containerInfoRepository.saveAndFlush(container);
}
delete方法构造ServerDestroyContainerRequest请求,然后遍历workerClusterQueryService.getAllAliveWorkers,挨个发送destroyRequest,最后更新container状态为DELETED,保存到数据库
public String uploadContainerJarFile(MultipartFile file) throws IOException {
String workerDirStr = OmsFileUtils.genTemporaryWorkPath();
String tmpFileStr = workerDirStr + "tmp.jar";
File workerDir = new File(workerDirStr);
File tmpFile = new File(tmpFileStr);
try {
// 下载到本地
FileUtils.forceMkdirParent(tmpFile);
file.transferTo(tmpFile);
// 生成MD5,这兄弟耗时有点小严重
String md5 = OmsFileUtils.md5(tmpFile);
String fileName = genContainerJarName(md5);
// 上传到 mongoDB,这兄弟耗时也有点小严重,导致这个接口整体比较慢...不过也没必要开线程去处理
gridFsManager.store(tmpFile, GridFsManager.CONTAINER_BUCKET, fileName);
// 将文件拷贝到正确的路径
String finalFileStr = OmsFileUtils.genContainerJarPath() + fileName;
File finalFile = new File(finalFileStr);
if (finalFile.exists()) {
FileUtils.forceDelete(finalFile);
}
FileUtils.moveFile(tmpFile, finalFile);
return md5;
}finally {
CommonUtils.executeIgnoreException(() -> FileUtils.forceDelete(workerDir));
}
}
uploadContainerJarFile先将MultipartFile保存到本地,然后通过gridFsManager.store保存到gridFs,然后将文件移动到指定目录,最后删除临时目录
public File fetchContainerJarFile(String version) {
String fileName = genContainerJarName(version);
String filePath = OmsFileUtils.genContainerJarPath() + fileName;
File localFile = new File(filePath);
if (localFile.exists()) {
return localFile;
}
if (gridFsManager.available()) {
downloadJarFromGridFS(fileName, localFile);
}
return localFile;
}
fetchContainerJarFile根据版本生成文件名,然后去本地查找,找到则返回,找不到则判断gridFsManager是否可用,可用则downloadJarFromGridFS
public void deploy(Long containerId, Session session) throws Exception {
String deployLock = "containerDeployLock-" + containerId;
RemoteEndpoint.Async remote = session.getAsyncRemote();
// 最长部署时间:10分钟
boolean lock = lockService.tryLock(deployLock, DEPLOY_MAX_COST_TIME);
if (!lock) {
remote.sendText("SYSTEM: acquire deploy lock failed, maybe other user is deploying, please wait until the running deploy task finished.");
return;
}
try {
Optional containerInfoOpt = containerInfoRepository.findById(containerId);
if (!containerInfoOpt.isPresent()) {
remote.sendText("SYSTEM: can't find container by id: " + containerId);
return;
}
ContainerInfoDO container = containerInfoOpt.get();
Date lastDeployTime = container.getLastDeployTime();
if (lastDeployTime != null) {
if ((System.currentTimeMillis() - lastDeployTime.getTime()) < DEPLOY_MIN_INTERVAL) {
remote.sendText("SYSTEM: [warn] deploy too frequent, last deploy time is: " + DateFormatUtils.format(lastDeployTime, OmsConstant.TIME_PATTERN));
}
}
// 准备文件
File jarFile = prepareJarFile(container, session);
if (jarFile == null) {
return;
}
double sizeMB = 1.0 * jarFile.length() / FileUtils.ONE_MB;
remote.sendText(String.format("SYSTEM: the jarFile(size=%fMB) is prepared and ready to be deployed to the worker.", sizeMB));
// 修改数据库,更新 MD5和最新部署时间
Date now = new Date();
container.setGmtModified(now);
container.setLastDeployTime(now);
containerInfoRepository.saveAndFlush(container);
remote.sendText(String.format("SYSTEM: update current container version=%s successfully!", container.getVersion()));
// 开始部署(需要分批进行)
final List allAliveWorkers = workerClusterQueryService.getAllAliveWorkers(container.getAppId());
if (allAliveWorkers.isEmpty()) {
remote.sendText("SYSTEM: there is no worker available now, deploy failed!");
return;
}
String port = environment.getProperty("local.server.port");
String downloadURL = String.format("http://%s:%s/container/downloadJar?version=%s", NetUtils.getLocalHost(), port, container.getVersion());
ServerDeployContainerRequest req = new ServerDeployContainerRequest(containerId, container.getContainerName(), container.getVersion(), downloadURL);
long sleepTime = calculateSleepTime(jarFile.length());
AtomicInteger count = new AtomicInteger();
allAliveWorkers.forEach(workerInfo -> {
final URL url = ServerURLFactory.deployContainer2Worker(workerInfo.getAddress());
transportService.tell(workerInfo.getProtocol(), url, req);
remote.sendText("SYSTEM: send deploy request to " + url.getAddress());
if (count.incrementAndGet() % DEPLOY_BATCH_NUM == 0) {
CommonUtils.executeIgnoreException(() -> Thread.sleep(sleepTime));
}
});
remote.sendText("SYSTEM: deploy finished, congratulations!");
}finally {
lockService.unlock(deployLock);
}
}
deploy先加锁,然后通过prepareJarFile准备jar,更新container的最近部署时间,然后通过workerClusterQueryService.getAllAliveWorkers(container.getAppId())获取存活的workers,遍历挨个执行ServerDeployContainerRequest,最后释放锁
@DesignateServer
public String fetchDeployedInfo(Long appId, Long containerId) {
List infoList = workerClusterQueryService.getDeployedContainerInfos(appId, containerId);
Set aliveWorkers = workerClusterQueryService.getAllAliveWorkers(appId)
.stream()
.map(WorkerInfo::getAddress)
.collect(Collectors.toSet());
Set deployedList = Sets.newLinkedHashSet();
List unDeployedList = Lists.newLinkedList();
Multimap version2Address = ArrayListMultimap.create();
infoList.forEach(info -> {
String targetWorkerAddress = info.getWorkerAddress();
if (aliveWorkers.contains(targetWorkerAddress)) {
deployedList.add(targetWorkerAddress);
version2Address.put(info.getVersion(), targetWorkerAddress);
}else {
unDeployedList.add(targetWorkerAddress);
}
});
StringBuilder sb = new StringBuilder("========== DeployedInfo ==========").append(System.lineSeparator());
// 集群分裂,各worker版本不统一,问题很大
if (version2Address.keySet().size() > 1) {
sb.append("WARN: there exists multi version container now, please redeploy to fix this problem").append(System.lineSeparator());
sb.append("divisive version ==> ").append(System.lineSeparator());
version2Address.forEach((v, addressList) -> {
sb.append("version: ").append(v).append(System.lineSeparator());
sb.append(addressList);
});
sb.append(System.lineSeparator());
}
// 当前在线未部署机器
if (!CollectionUtils.isEmpty(unDeployedList)) {
sb.append("WARN: there exists unDeployed worker(OhMyScheduler will auto fix when some job need to process)").append(System.lineSeparator());
sb.append("unDeployed worker list ==> ").append(System.lineSeparator());
}
// 当前部署机器
sb.append("deployed worker list ==> ").append(System.lineSeparator());
if (CollectionUtils.isEmpty(deployedList)) {
sb.append("no worker deployed now~");
}else {
sb.append(deployedList);
}
return sb.toString();
}
fetchDeployedInfo先通过workerClusterQueryService.getDeployedContainerInfos获取DeployedContainerInfo,再通过workerClusterQueryService.getAllAliveWorkers(appId)获取aliveWorkers,最后遍历DeployedContainerInfo,挨个判断是否已经部署,最后构建DeployedInfo输出
private File prepareJarFile(ContainerInfoDO container, Session session) throws Exception {
RemoteEndpoint.Async remote = session.getAsyncRemote();
// 获取Jar,Git需要先 clone成Jar计算MD5,JarFile则直接下载
ContainerSourceType sourceType = ContainerSourceType.of(container.getSourceType());
if (sourceType == ContainerSourceType.Git) {
String workerDirStr = OmsFileUtils.genTemporaryWorkPath();
File workerDir = new File(workerDirStr);
FileUtils.forceMkdir(workerDir);
try {
// git clone
remote.sendText("SYSTEM: start to git clone the code repo, using config: " + container.getSourceInfo());
GitRepoInfo gitRepoInfo = JsonUtils.parseObject(container.getSourceInfo(), GitRepoInfo.class);
CloneCommand cloneCommand = Git.cloneRepository()
.setDirectory(workerDir)
.setURI(gitRepoInfo.getRepo())
.setBranch(gitRepoInfo.getBranch());
if (!StringUtils.isEmpty(gitRepoInfo.getUsername())) {
CredentialsProvider credentialsProvider = new UsernamePasswordCredentialsProvider(gitRepoInfo.getUsername(), gitRepoInfo.getPassword());
cloneCommand.setCredentialsProvider(credentialsProvider);
}
cloneCommand.call();
// 获取最新的 commitId 作为版本
String oldVersion = container.getVersion();
try (Repository repository = Git.open(workerDir).getRepository()) {
Ref head = repository.getRefDatabase().findRef("HEAD");
container.setVersion(head.getObjectId().getName());
}
if (container.getVersion().equals(oldVersion)) {
remote.sendText(String.format("SYSTEM: this commitId(%s) is the same as the last.", oldVersion));
}else {
remote.sendText(String.format("SYSTEM: new version detected, from %s to %s.", oldVersion, container.getVersion()));
}
remote.sendText("SYSTEM: git clone successfully, star to compile the project.");
// mvn clean package -DskipTests -U
Invoker mvnInvoker = new DefaultInvoker();
InvocationRequest ivkReq = new DefaultInvocationRequest();
// -U:强制让Maven检查所有SNAPSHOT依赖更新,确保集成基于最新的状态
// -e:如果构建出现异常,该参数能让Maven打印完整的stack trace
// -B:让Maven使用批处理模式构建项目,能够避免一些需要人工参与交互而造成的挂起状态
ivkReq.setGoals(Lists.newArrayList("clean", "package", "-DskipTests", "-U", "-e", "-B"));
ivkReq.setBaseDirectory(workerDir);
ivkReq.setOutputHandler(remote::sendText);
ivkReq.setBatchMode(true);
mvnInvoker.execute(ivkReq);
String targetDirStr = workerDirStr + "/target";
File targetDir = new File(targetDirStr);
IOFileFilter fileFilter = FileFilterUtils.asFileFilter((dir, name) -> name.endsWith("jar-with-dependencies.jar"));
Collection jarFile = FileUtils.listFiles(targetDir, fileFilter, null);
if (CollectionUtils.isEmpty(jarFile)) {
remote.sendText("SYSTEM: can't find packaged jar(maybe maven build failed), so deploy failed.");
return null;
}
File jarWithDependency = jarFile.iterator().next();
String jarFileName = genContainerJarName(container.getVersion());
if (!gridFsManager.exists(GridFsManager.CONTAINER_BUCKET, jarFileName)) {
remote.sendText("SYSTEM: can't find the jar resource in remote, maybe this is a new version, start to upload new version.");
gridFsManager.store(jarWithDependency, GridFsManager.CONTAINER_BUCKET, jarFileName);
remote.sendText("SYSTEM: upload to GridFS successfully~");
}else {
remote.sendText("SYSTEM: find the jar resource in remote successfully, so it's no need to upload anymore.");
}
// 将文件从临时工作目录移动到正式目录
String localFileStr = OmsFileUtils.genContainerJarPath() + jarFileName;
File localFile = new File(localFileStr);
if (localFile.exists()) {
FileUtils.forceDelete(localFile);
}
FileUtils.copyFile(jarWithDependency, localFile);
return localFile;
} catch (Throwable t) {
log.error("[ContainerService] prepareJarFile failed for container: {}", container, t);
remote.sendText("SYSTEM: [ERROR] prepare jar file failed: " + ExceptionUtils.getStackTrace(t));
} finally {
// 删除工作区数据
FileUtils.forceDelete(workerDir);
}
}
// 先查询本地是否存在目标 Jar 文件
String jarFileName = genContainerJarName(container.getVersion());
String localFileStr = OmsFileUtils.genContainerJarPath() + jarFileName;
File localFile = new File(localFileStr);
if (localFile.exists()) {
remote.sendText("SYSTEM: find the jar file in local disk.");
return localFile;
}
// 从 MongoDB 下载
remote.sendText(String.format("SYSTEM: try to find the jarFile(%s) in GridFS", jarFileName));
downloadJarFromGridFS(jarFileName, localFile);
remote.sendText("SYSTEM: download jar file from GridFS successfully~");
return localFile;
}
prepareJarFile针对git来源的先执行clone,然后执行mvn打包,最后找出jar-with-dependencies.jar结尾的文件;非git来源的则先判断本地是否目标jar,没有的话,则从gridFs下载
ContainerController提供了downloadJar、downloadContainerTemplate、jarUpload、save、delete、list、listDeployedWorker接口,其中downloadJar、jarUpload、save、delete委托给了containerService。