从0构建Scala程序实现wordcount

从0构建Scala程序实现wordcount_第1张图片

从0构建Scala程序实现wordcount_第2张图片

从0构建Scala程序实现wordcount_第3张图片

从0构建Scala程序实现wordcount_第4张图片

从0构建Scala程序实现wordcount_第5张图片

从0构建Scala程序实现wordcount_第6张图片

写pom.xml文件


  4.0.0
  portrait_conf_parse
  portrait_conf_parse
  1.0-SNAPSHOT
  ${project.artifactId}
  My wonderfull scala app
  2015

  
    1.7
    1.7
    UTF-8
    2.10
    2.11.8
    2.2.0
  

  

    
      org.apache.hadoop
      hadoop-client
      2.7.1
    

    
      org.apache.spark
      spark-core_2.11
      ${spark.version}
    

    
      org.apache.spark
      spark-hive_2.11
      ${spark.version}
    

    
      org.apache.spark
      spark-sql_2.11
      ${spark.version}
    

    
      org.scala-lang
      scala-library
      ${scala.version}
    

    
    
      junit
      junit
      4.11
      test
    
    
      org.specs2
      specs2_${scala.tools.version}
      1.13
      test
    
    
      org.scalatest
      scalatest_${scala.tools.version}
      2.0.M6-SNAP8
      test
    
  

  
    src/main/scala
    src/test/scala
    
      
        org.scala-tools
        maven-scala-plugin
        
          
            
              compile
              testCompile
            
          
        
        
          ${scala.version}
          
            -target:jvm-1.5
          
        
      
      
        org.apache.maven.plugins
        maven-eclipse-plugin
        
          true
          
            ch.epfl.lamp.sdt.core.scalabuilder
          
          
            ch.epfl.lamp.sdt.core.scalanature
          
          
            org.eclipse.jdt.launching.JRE_CONTAINER
            ch.epfl.lamp.sdt.launching.SCALA_CONTAINER
          
        
      
    
  
  
    
      
        org.scala-tools
        maven-scala-plugin
        
          ${scala.version}
        
      
    
  
WordCount.scala

从0构建Scala程序实现wordcount_第7张图片

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession

object WordCount {

  def main(args: Array[String]): Unit = {
    val sparkSession = SparkSession.builder()
      .appName("HelloWorld")
      .master("local")
      .enableHiveSupport()
      .getOrCreate()

    val sc = sparkSession.sparkContext
    val txt = sc.textFile("/Users/workspace/portrait_conf_parse/conf_json")

    val line = txt.flatMap(item => item.split(" "))
    val counts: RDD[(String, Int)] = line.map(word => (word, 1)).reduceByKey(_+_)
    counts.foreach(item => println(item._1, item._2))

  }
}

 

你可能感兴趣的:(Scala)