Spark调试

  1. 编写代码
// pom.xml



    4.0.0

    org.learn.example
    spark-example
    1.0-SNAPSHOT

    
        
            org.apache.spark
            spark-core_2.11
            2.2.0
            provided
        

        
            org.apache.spark
            spark-sql_2.11
            2.2.0
            provided
        

        
            org.apache.spark
            spark-streaming_2.11
            2.2.0
            provided
        

        
            org.apache.spark
            spark-streaming-kafka-0-10_2.11
            2.2.0
            provided
        

        
            org.apache.commons
            commons-lang3
            3.1
            provided
        

        
            org.apache.kafka
            kafka-clients
            1.1.0
        
    

    
        
            
                org.apache.maven.plugins
                maven-compiler-plugin
                3.7.0
                
                    1.8
                    1.8
                
            
        
    

// 入口

public class SparkJob {

    public static void main(String[] args) {
        String appName = args[0];
        SparkSession session = SparkSession.builder().appName(appName).getOrCreate();
        try {
            String fullClassName = "org.learn.example.jobs." + appName;
            Class clazz = Class.forName(fullClassName);
            Method method = clazz.getDeclaredMethod("run", SparkSession.class);
            method.invoke(clazz.newInstance(), session);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            session.close();
        }
    }
}


// 各统计
public class WordCount implements ISparkJob {

    @Override
    public void run(SparkSession session) {
        JavaRDD javaRDD = session.createDataset(Arrays.asList("aaa", "bbb", "aaa"), Encoders.STRING()).javaRDD();
        javaRDD.flatMap((line) -> Arrays.asList(line.split(" ")).iterator())
                .mapToPair((word) -> new Tuple2<>(word, 1))
                .reduceByKey((count1, count2) -> count1 + count2)
                .foreach(pair -> System.out.println(pair._1 + " - " + pair._2));
    }

}

  1. 配置IDEA

pom.xml文件中,provided表示打包的时候不打进jar包。

但是用IDEA调试的时候需要使用被标记为provided的jar包,因此需要配置IDEA。

Image 1.png
  1. 点击debug进行调试

另外:将代码打包后可以提交到本地运行(使用如下脚本)

-- submit-local.cmd

spark-submit --master local[*] --class org.learn.example.SparkJob target/spark-example-1.0-SNAPSHOT.jar WordCount


你可能感兴趣的:(Spark调试)