广播变量和累加器使用

package com.ws.spark

import org.apache.spark.broadcast.Broadcast
import org.apache.spark.util.LongAccumulator
import org.apache.spark.{SparkConf, SparkContext}

/**
  * 广播变量和累加器使用
  */
object AutoIncre {

  def main(args: Array[String]): Unit = {

    val sparkConf = new SparkConf().setAppName("AutoIncre").setMaster("local[*]")

    val sparkContext = new SparkContext(sparkConf)

    //bd(sparkContext)
    auto(sparkContext)

    sparkContext.stop()
  }

  //测试广播变量
 //不使用广播变量:每个task独立一份变量,内存开销大,消耗网络IO(变量占用内存越大越不利)
//使用广播变量:每个executor进程只有有一份,在此进程内的task线程共享一份变量(节省内存,开销小,网络IO小)
  def bd(sparkContext: SparkContext): Unit ={
    val numberRdd = sparkContext.parallelize(1 to 10)

    var total = 10

    var bd: Broadcast[Int] = sparkContext.broadcast(total)

    numberRdd.map(x=>{
      val value: Int = bd.value
      println(s"x = $x ,  total = $total , value = $value")
    }).collect()
  }

  //测试累加器
  def auto(sc: SparkContext): Unit ={
    val numRdd = sc.parallelize(1 to 6)

    val accumulator: LongAccumulator = sc.longAccumulator

    numRdd.map(x=>{
      accumulator.add(x)
    }).collect()

    println(accumulator.count)
    println(accumulator.sum)
    println(accumulator.value)
  }

}

你可能感兴趣的:(spark)