


name := "SparkPi"

version := "1.0"

scalaVersion := "2.10.3"

libraryDependencies += "org.apache.spark" %% "spark-core" % "0.9.1"

resolvers += "Akka Repository" at "http://repo.akka.io/releases/"


import scala.math.random

import org.apache.spark._

/** Computes an approximation to pi */
object SparkPi {
  def main(args: Array[String]) {
    if (args.length == 0) {
      System.err.println("Usage: SparkPi <master> [<slices>]")
    val spark = new SparkContext(args(0), "SparkPi",
      System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass))
    val slices = if (args.length > 1) args(1).toInt else 2
    val n = 100000 * slices
    val count = spark.parallelize(1 to n, slices).map { i =>
      val x = random * 2 - 1
      val y = random * 2 - 1
      if (x*x + y*y < 1) 1 else 0
    }.reduce(_ + _)
    println("Pi is roughly " + 4.0 * count / n)

$ find .


$ sbt "project sparkpi" "run local[3]"
[info] Set current project to SparkPi (in build file:/home/jpan/Mywork/spark-example/exspark/SparkPi/)
[info] Set current project to SparkPi (in build file:/home/jpan/Mywork/spark-example/exspark/SparkPi/)
[info] Updating {file:/home/jpan/Mywork/spark-example/exspark/SparkPi/}sparkpi...
[info] Resolving org.fusesource.jansi#jansi;1.4 ...
[info] Done updating.
[info] Compiling 1 Scala source to /home/jpan/Mywork/spark-example/exspark/SparkPi/target/scala-2.10/classes...
[info] Running SparkPi local[3]
Pi is roughly 3.14652
14/05/09 16:10:46 INFO handler.ContextHandler: stopped o.e.j.s.h.ContextHandler{/,null}
14/05/09 16:10:46 INFO handler.ContextHandler: stopped o.e.j.s.h.ContextHandler{/static,null}
14/05/09 16:10:46 INFO handler.ContextHandler: stopped o.e.j.s.h.ContextHandler{/metrics/json,null}
14/05/09 16:10:46 INFO handler.ContextHandler: stopped o.e.j.s.h.ContextHandler{/executors,null}
14/05/09 16:10:46 INFO handler.ContextHandler: stopped o.e.j.s.h.ContextHandler{/environment,null}
14/05/09 16:10:46 INFO handler.ContextHandler: stopped o.e.j.s.h.ContextHandler{/stages,null}
14/05/09 16:10:46 INFO handler.ContextHandler: stopped o.e.j.s.h.ContextHandler{/stages/pool,null}
14/05/09 16:10:46 INFO handler.ContextHandler: stopped o.e.j.s.h.ContextHandler{/stages/stage,null}
14/05/09 16:10:46 INFO handler.ContextHandler: stopped o.e.j.s.h.ContextHandler{/storage,null}
14/05/09 16:10:46 INFO handler.ContextHandler: stopped o.e.j.s.h.ContextHandler{/storage/rdd,null}
14/05/09 16:10:48 INFO spark.MapOutputTrackerMasterActor: MapOutputTrackerActor stopped!
14/05/09 16:10:48 INFO network.ConnectionManager: Selector thread was interrupted!
14/05/09 16:10:48 INFO network.ConnectionManager: ConnectionManager stopped
14/05/09 16:10:48 INFO storage.MemoryStore: MemoryStore cleared
14/05/09 16:10:48 INFO storage.BlockManager: BlockManager stopped
14/05/09 16:10:48 INFO storage.BlockManagerMasterActor: Stopping BlockManagerMaster
14/05/09 16:10:48 INFO storage.BlockManagerMaster: BlockManagerMaster stopped
14/05/09 16:10:48 INFO spark.SparkContext: Successfully stopped SparkContext
14/05/09 16:10:48 INFO remote.RemoteActorRefProvider$RemotingTerminator: Shutting down remote daemon.
14/05/09 16:10:48 INFO remote.RemoteActorRefProvider$RemotingTerminator: Remote daemon shut down; proceeding with flushing remote transports.
[success] Total time: 12 s, completed May 9, 2014 4:10:48 PM

 if (args.length == 0) {
      System.err.println("Usage: SparkPi <master> [<slices>]")


[ ]这是可选输入,表示线程数。我输入的是3,即3个线程运行。

    val spark = new SparkContext(args(0), "SparkPi",
      System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass))


Main entry point for Spark functionality. A SparkContext represents the connection to a Sparkcluster, and can be used to create RDDs, accumulators and broadcast variables on that cluster.


    val count = spark.parallelize(1 to n, slices).map { i =>
      val x = random * 2 - 1
      val y = random * 2 - 1
      if (x*x + y*y < 1) 1 else 0
    }.reduce(_ + _)

def parallelize[T](seq: Seq[T], numSlices: Int = defaultParallelism)(implicit arg0: ClassTag[T]): RDD[T]

Distribute a local Scala collection to form an RDD.

