Spark广播变量的使用

//landscape_zb变量格式:mutable.HashMap[String, ArrayBuffer[Any]]
//单个landscape对象的格式 (new_key,(ls_id, lat, lon)),用ArrayBuffer将多组lat, lon组合起来
var bc_landscape_zb = sc.broadcast(landscape_zb)

val bc_landscape_new =
  no_home_new_key.mapPartitions({
    iter =>
      var bc_landscape = bc_landscape_zb.value
      for {
        it <- iter
        //it is (new_key,(phone_no,travel_time,lat,lon))
        if (bc_landscape.contains(it._1))
      } yield (it._2._1, it._2._2, it._2._3, bc_landscape.get(it._1).get)
    // (phone_no, travel_time, ls_id, (Double, Double), (Double, Double))
  })

你可能感兴趣的:(Spark)