spark学习记录(八、广播变量和累加器)

一、广播变量

spark学习记录(八、广播变量和累加器)_第1张图片

public class JavaExample {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf();
        conf.setMaster("local").setAppName("JavaExample");
        JavaSparkContext sc = new JavaSparkContext(conf);

        final List list = Arrays.asList("hello world", "hello spark");
        //广播变量
        final Broadcast> broadcast = sc.broadcast(list);
        JavaRDD rdd1 = sc.parallelize(Arrays.asList("hello world", "hello spark", "hello java"));
        JavaRDD rdd2 = rdd1.filter(new Function() {
            public Boolean call(String line) throws Exception {
                return !broadcast.value().contains(line);
            }
        });
        rdd2.foreach(new VoidFunction() {
            public void call(String s) throws Exception {
                System.out.println(s);
            }
        });
    }
}

 二、累加器

spark学习记录(八、广播变量和累加器)_第2张图片

object ScalaExample {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local").setAppName("ScalaExample")
    val sc = new SparkContext(conf);
    val rdd1 = sc.textFile("C://words.txt")
//    累加器
    val accumulator = sc.accumulator(0);
    rdd1.map(line =>{
      accumulator.add(1)
      println(accumulator)
      line
    }).collect()
    println(" i = " +accumulator.value)
  }
}

 

你可能感兴趣的:(spark)