spark入门示例代码wordcount

val rawFile = sc.textFile("README.md")
val words = rawFile.flatMap(line=>line.split(" "))
val wordNumber = words.map(w => (w,1))
val wordCounts = wordNumber.reduceByKey(_ + _)
wordCounts.foreach(println)


sc.textFile("README.md").flatMap(line => line.split(" ")).map(w => (w,1)).reduceByKey(_ + _).foreach(println)


sc.textFile("README.md").flatMap(line => line.split(" ")).map(w => (w,1)).groupByKey().map((p:(String, Iterable[Int])) => (p._1,p._2.sum)).collect


SimpleApp
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
object SimpleApp{
  def main(args: Array[String]){
  val logFile = "README.md"
  val conf = new SparkConf().setAppName("Simple Application")
  val sc = new SparkContext(conf)
  val logData = sc.textFile(logFile, 2).cache()
  val numAs = logData.filter(line => line.contains("a")).count()
  val numBs = logData.filter(line => line.coutains("b")).count()
  println("Lines with a:%s, Lines with b:%s".format(numAs, numBs))
  }
}


simple.sbt

name := "Simple Project"

version := "1.0"

scalaVersion = "2.11.4"

libraryDependencies += "org.apache.spark" %% "spark-core" % "1.4.0"

resolvers += "Akka Repository"


你可能感兴趣的:(spark入门示例代码wordcount)