Spark Accumulator的正确使用方式

Spark1.6中的

public static void main(String[] args) {

SparkConf conf = new SparkConf()
.setMaster("local[3]") 
.setAppName("CoalesceTest");
JavaSparkContext sc = new JavaSparkContext(conf);
SQLContext sqlContext = new SQLContext(sc.sc());

List grades = Arrays.asList("85", "90", "60", null);

List> nameAndGrades = Arrays.asList(new Tuple2("jack",88),new Tuple2("lucy",90), new Tuple2("marry",66), new Tuple2(null,0));
JavaPairRDD parallelizePairs = sc.parallelizePairs(nameAndGrades,3);
List partitions = parallelizePairs.partitions();

final Accumulator accumulator = sc.accumulator(0);
final Accumulator accumulatorB = sc.accumulator(0);
final Broadcast> broadcast = sc.broadcast(accumulator);

parallelizePairs.foreach(new VoidFunction>() {

private static final long serialVersionUID = 1L;


@Override
public void call(Tuple2 t) throws Exception {

System.out.println("foreach-------"+t._1 + "---" +t._2 );
//此处进行的累加是无效的,只能够进行Accumulator的累加操作才行 

Accumulator value = broadcast.value();


//value.add(1); 同一个accumulator进行两次累加只取得当前的broadcast accumulator的值
accumulator.add(1);
accumulatorB.add(1);
//采用broadcast方式可以正确的获取accumulator 的 initValue 以及localValue
//System.out.println("-------accumulator init value is " + value.initialValue());


//initValue 获取不到
//System.out.println("-------accumulatorB init value is " + accumulatorB.initialValue());

//不能获得正确的累加值
//System.out.println("foreach----------count is " + count ++ );

System.out.println("----accumulator localValue is " + value.localValue());

//System.out.println("----accumulatorB localValue is " + accumulatorB.localValue());
}
});
// accumulator 只能够在driver中获取值,
//Create an [[org.apache.spark.Accumulator]] variable of a given type, which tasks can “add” 
// values to using the add method. Only the master can access the accumulator’s value.
// 在集群上运行的任务可以通过add或者”+=”方法在累加器上进行累加操作。但是,它们不能读取它的值。只有驱动程序能够读取它的值,通过累加器的value方法

System.out.println("----accumulator localValue is " + accumulator.localValue() + " accumulatorB.value : " +accumulatorB.value());

}


你可能感兴趣的:(java,spark)