Spark任务提交与SpringBoot项目集成

Spark任务提交与SpringBoot项目集成


原理

利用官方提供的SparkLauncher java接口来使用java代码提交Spark任务到Spark集群

实现

  • 一个SparkPI 的计算demo,并打包成jar
  • 新建SpringBoot项目,项目加入spark-core依赖,否则无法找到SparkLuncher
 	<properties>
        <java.version>1.8java.version>
        <project.build.sourceEncoding>UTF-8project.build.sourceEncoding>
        <sala-version>2.11sala-version>
        <spark-version>2.4.4spark-version>
        <scope-type>compilescope-type>
    properties>
    <dependency>
        <groupId>org.apache.sparkgroupId>
        <artifactId>spark-core_${sala-version}artifactId>
        <version>${spark-version}version>
        <scope>${scope-type}scope>
   dependency>
  • 新建个sparkController,内部代码如下:
package com.ly.spark.oneline.controller;

import org.apache.spark.launcher.SparkLauncher;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

import java.util.HashMap;

@RestController
public class SparkLauncherController {

    @GetMapping(value = "/sparkPI")
    public String submitTast(){
        HashMap env = new HashMap();
        //hadoop、spark环境变量读取
        env.put("HADOOP_CONF_DIR" ,  System.getenv().getOrDefault("HADOOP_CONF_DIR","/usr/local/hadoop/etc/overriterHaoopConf"));
        env.put("JAVA_HOME", System.getenv().getOrDefault("JAVA_HOME","/usr/local/java/jdk1.8.0_151"));
        //创建SparkLauncher启动器
        SparkLauncher handle = new SparkLauncher(env)
                .setSparkHome("/home/spark/spark-2.4.4-bin-hadoop2.7")
                .setAppResource("/home/sino/spark-model-1.0/lib/spark-model-1.0.jar")
                .setMainClass("com.sinovatio.spark.JobStarter")
                .setMaster("yarn")
                .setDeployMode("client")
                .setConf("spark.yarn.queue","sino")
                .setConf("spark.app.id", "luncher-test")
                .setConf("spark.driver.memory", "1g")
                .setConf("spark.executor.memory", "1g")
                .setConf("spark.executor.instances", "2")
                .setConf("spark.executor.cores", "2")
                .setConf("spark.default.parallelism", "12")
                .setConf("spark.driver.allowMultipleContexts","true")
                .setVerbose(true);

        try {
            //任务提交
            Process process = handle.launch();
            //日志跟踪子线程
            InputStreamReaderRunnable inputStreamReaderRunnable = new InputStreamReaderRunnable(process.getInputStream(), "input");
            Thread inputThread = new Thread(inputStreamReaderRunnable, "LogStreamReader input");
            inputThread.start();

            InputStreamReaderRunnable errorStreamReaderRunnable = new InputStreamReaderRunnable(process.getErrorStream(), "error");
            Thread errorThread = new Thread(errorStreamReaderRunnable, "LogStreamReader error");
            errorThread.start();

            System.out.println("Waiting for finish...");
            int exitCode = process.waitFor();
            System.out.println("Finished! Exit code:" + exitCode);
            return "status: "+exitCode;

        }catch (Exception e){
            e.printStackTrace();
            return "status: "+1;
        }

    }
    @GetMapping(value = "/hello")
    public String hello(){
        return "this is hello page!";
    }

}
    

子线程日志监控类:

package com.ly.spark.oneline.controller;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;

public class InputStreamReaderRunnable implements Runnable {
 
    private BufferedReader reader;
 
    private String name;
 
    public InputStreamReaderRunnable(InputStream is, String name) {
        this.reader = new BufferedReader(new InputStreamReader(is));
        this.name = name;
    }
 
    public void run() {
        System.out.println("InputStream " + name + ":");
        try {
            String line = reader.readLine();
            while (line != null) {
                System.out.println(line);
                line = reader.readLine();
            }
            reader.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
  • SpringBoot项目打包,与sparkPi项目一起放到hadoop集群里某台机器上
  • 启动该SpringBoot项目,访问该rest接口: http://ip:port/sparkPI 实现任务提交,成功并返回状态码0,否则任务失败

以上参考官网与 :https://blog.csdn.net/u011244682/article/details/79170134

你可能感兴趣的:(Spark,Spring,Boot)