spark大数据分析:spark core(10)广播变量

文章目录

      • 缘由
      • 案例

缘由

开发者将数据缓存在每台机器上,不需要机器之间进行频繁的网络IO,减少网络开销,CPU序列化以及反序列化,广播变量分为可变数据类型(例如累加器),不可变类型

案例

通过城市id补全用户城市信息

import org.apache.spark.{SparkConf, SparkContext}

object UserCityBrocast {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
      .setMaster("local[*]")
      .setAppName("UserCityBrocast")
    val sc = new SparkContext(conf)

    val cityDetailMap = Map(
      "010" -> "北京",
      "021" -> "上海",
      "020" -> "广州",
      "0755" -> "深圳")

    val userDetailMap = Map(
      "15837312345" -> ("userID_001", "Alice"),
      "15837322331" -> ("userID_002", "Bob"),
      "13637316666" -> ("userID_003", "Thomas"),
      "18537312399" -> ("userID_004", "Karen"),
      "13637312376" -> ("userID_005", "Tom"),
      "13737312908" -> ("userID_006", "Kotlin"))


    val cdmBroadcast = sc.broadcast(cityDetailMap)
    val udmBroadcast = sc.broadcast(userDetailMap)


    val userArray = Array(
      ("010", "15837322331"),
      ("010", "18537312399"),
      ("0755", "13737312908"),
      ("020", "13637312376"),
      ("020", "15837312345"))

    val userRDD = sc.parallelize(userArray, 2)
    val aggregateRDD = userRDD.aggregateByKey(collection.mutable.Set[String]())(
      (telephoneSet, telephone) => telephoneSet += telephone,
      (telephoneSet1, telephoneSet2) => telephoneSet1 ++= telephoneSet2)
    val resultRDD = aggregateRDD.map(info => {
      val cityInfo = CityInfo(info._1, cdmBroadcast.value(info._1))
      val userInfoSet = collection.mutable.Set[UserInfo]()
      for (telephone <- info._2) {
        val idAndName = udmBroadcast.value(telephone)
        val userInfo = UserInfo(idAndName._1, telephone, idAndName._2)
        userInfoSet.add(userInfo)
      }
      (cityInfo, userInfoSet)
    })
    println(resultRDD.collect.mkString(","))
	
	//释放广播变量不再使用
    cdmBroadcast.unpersist
    udmBroadcast.unpersist
  }
}
case class CityInfo(cityCode: String, cityName: String)

case class UserInfo(userID: String, telephone: String, userName: String)

你可能感兴趣的:(spark-鲨鱼)