spark--actions算子--collect

import org.apache.spark.{SparkConf, SparkContext}

/**
  * Created by liupeng on 2017/6/16.
  */
object A_collect {
  System.setProperty("hadoop.home.dir","F:\\hadoop-2.6.5")

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("collect_test").setMaster("local")
    val sc = new SparkContext(conf)
    //准备一下数据
    val nameList : List[Int] = List(1,2,3,4,5)
    val numbers = sc.parallelize(nameList)
      .map(_ * 2)
    //将RDD序列化到driver端
    //建议用foreach action操作;collect在远程集群上遍历RDD的元素,将分布式的在远程集群里面的数据拉取到本地,如果数据量大,会走大量的网络传输,甚至有可能OOM内存溢出
    val doubleNumbers : Array[Int] = numbers.collect()
    for (num <- doubleNumbers)
      {
        println(num)
      }
  }
}
运行结果:

2
4
6
8
10

你可能感兴趣的:(spark)