spark scala-基础action操作

本文章主要通过scala实现spark的基础action操作演示

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext

/**
 * @author jhp
  *  spark 基础action操作
 */
object ActionOperation {
  def main(args: Array[String]): Unit = {
     reduce()
     collect()
     count()
     take()
     countByKey()
  }
  /**
    * 累加
    */
  def reduce() {
    val conf = new SparkConf()
        .setAppName("reduce")
        .setMaster("local")  
    val sc = new SparkContext(conf)
    
    val numberArray = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
    val numbers = sc.parallelize(numberArray, 1)  
    val sum = numbers.reduce(_ + _)  
    
    println(sum)  
  }

  /**
    * 遍历集合
    */
  def collect() {
    val conf = new SparkConf()
        .setAppName("collect")
        .setMaster("local")  
    val sc = new SparkContext(conf)
    
    val numberArray = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
    val numbers = sc.parallelize(numberArray, 1)  
    val doubleNumbers = numbers.map { num => num * 2 }  
    
    val doubleNumberArray = doubleNumbers.collect()
    
    for(num <- doubleNumberArray) {
      println(num)  
    }
  }

  /**
    * 统计数量
    */
  def count() {
    val conf = new SparkConf()
        .setAppName("count")
        .setMaster("local")  
    val sc = new SparkContext(conf)
    
    val numberArray = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
    val numbers = sc.parallelize(numberArray, 1)  
    val count = numbers.count()
    
    println(count)  
  }

  /**
    * 获取TOP
    */
  def take() {
    val conf = new SparkConf()
        .setAppName("take")
        .setMaster("local")  
    val sc = new SparkContext(conf)
    
    val numberArray = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
    val numbers = sc.parallelize(numberArray, 1)  
    
    val top3Numbers = numbers.take(3)
    
    for(num <- top3Numbers) {
      println(num)  
    }
  }


  /**
    * 根据key统计数量
    */
  def countByKey() {
    val conf = new SparkConf()
        .setAppName("countByKey")  
        .setMaster("local")  
    val sc = new SparkContext(conf)
    
    val studentList = Array(Tuple2("class1", "leo"), Tuple2("class2", "jack"),
        Tuple2("class1", "tom"), Tuple2("class2", "jen"), Tuple2("class2", "marry"))   
    val students = sc.parallelize(studentList, 1)  
    val studentCounts = students.countByKey()  
    
    println(studentCounts)  
  }
  
 
  
def reduceByKey() {
  val conf = new SparkConf()
      .setAppName("groupByKey")  
      .setMaster("local")  
  val sc = new SparkContext(conf)
  
  val scoreList = Array(Tuple2("class1", 80), Tuple2("class2", 75),
      Tuple2("class1", 90), Tuple2("class2", 60))
  val scores = sc.parallelize(scoreList, 1)  
  val totalScores = scores.reduceByKey(_ + _)  
  
  totalScores.foreach(classScore => println(classScore._1 + ": " + classScore._2))  
}
}

你可能感兴趣的:(Spark,Action系列)