Spark代码3之Action:reduce,reduceByKey,sorted,lookup,take,saveAsTextFile
代码:
package LocalSpark /** * Created by xubo on 2016/3/3. */ import java.text.SimpleDateFormat import java.util.Date import org.apache.spark._ object Action1 { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("Transformation1").setMaster("local") val spark = new SparkContext(conf) //action var a1 = spark.parallelize(List(('a', 1), ('b', 1))) var a2 = spark.parallelize(List(('c', 1), ('d', 1))) var a3 = spark.parallelize(List(('a', 1), ('b', 1), ('a', 1))) var a4 = spark.parallelize(List(('c', 1), ('d', 1), ('b', 1), ('b', 2), ('b', 3), ('a', 1), ('a', 2))) for (i <- a4.lookup('a')) println(i) //reduce // var a5=spark.parallelize(List(1,2,4,3,2,5,7,1,3,4)) var a5=spark.parallelize(List(1,2,4,3,5)) println(a5.reduce(_+_)) //reduceByKey var r4=(a4.reduceByKey(_+_)) for((a,b)<-a4) println("a4:("+a+","+b+")") for((a,b)<-r4) println("r4:("+a+","+b+")") // for(i<-r4) i.mkString(",") i // sortBy val s4=r4.sortBy(_._2) for((a,b)<-s4) println("s4:("+a+","+b+")") //take // for (j,k)=a4.take(1) // println("s4:("+a+","+b+")") for((a,b)<-a4.take(1)) println("t4:("+a+","+b+")") //SaveAsTextFile val iString=new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date() ) val soutput="hdfs://<strong>MasterIP</strong>:9000/output/"+iString; //换成真实IP s4.saveAsTextFile(soutput) spark.stop() } }运行结果:
D:\1win7\java\jdk\bin\java -Didea.launcher.port=7533 "-Didea.launcher.bin.path=D:\1win7\idea\IntelliJ IDEA Community Edition 15.0.4\bin" -Dfile.encoding=UTF-8 -classpath "D:\1win7\java\jdk\jre\lib\charsets.jar;D:\1win7\java\jdk\jre\lib\deploy.jar;D:\1win7\java\jdk\jre\lib\ext\access-bridge-64.jar;D:\1win7\java\jdk\jre\lib\ext\dnsns.jar;D:\1win7\java\jdk\jre\lib\ext\jaccess.jar;D:\1win7\java\jdk\jre\lib\ext\localedata.jar;D:\1win7\java\jdk\jre\lib\ext\sunec.jar;D:\1win7\java\jdk\jre\lib\ext\sunjce_provider.jar;D:\1win7\java\jdk\jre\lib\ext\sunmscapi.jar;D:\1win7\java\jdk\jre\lib\ext\zipfs.jar;D:\1win7\java\jdk\jre\lib\javaws.jar;D:\1win7\java\jdk\jre\lib\jce.jar;D:\1win7\java\jdk\jre\lib\jfr.jar;D:\1win7\java\jdk\jre\lib\jfxrt.jar;D:\1win7\java\jdk\jre\lib\jsse.jar;D:\1win7\java\jdk\jre\lib\management-agent.jar;D:\1win7\java\jdk\jre\lib\plugin.jar;D:\1win7\java\jdk\jre\lib\resources.jar;D:\1win7\java\jdk\jre\lib\rt.jar;D:\1win7\scala;D:\1win7\scala\lib;D:\all\idea\scala2\out\production\scala2;G:\149\spark-assembly-1.5.2-hadoop2.6.0.jar;D:\1win7\scala\lib\scala-actors-migration.jar;D:\1win7\scala\lib\scala-actors.jar;D:\1win7\scala\lib\scala-library.jar;D:\1win7\scala\lib\scala-reflect.jar;D:\1win7\scala\lib\scala-swing.jar;D:\1win7\idea\IntelliJ IDEA Community Edition 15.0.4\lib\idea_rt.jar" com.intellij.rt.execution.application.AppMain LocalSpark.Action1 Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties 16/03/04 10:50:15 INFO SparkContext: Running Spark version 1.5.2 16/03/04 10:50:15 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 16/03/04 10:50:16 INFO SecurityManager: Changing view acls to: xubo 16/03/04 10:50:16 INFO SecurityManager: Changing modify acls to: xubo 16/03/04 10:50:16 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(xubo); users with modify permissions: Set(xubo) 16/03/04 10:50:16 INFO Slf4jLogger: Slf4jLogger started 16/03/04 10:50:16 INFO Remoting: Starting remoting 16/03/04 10:50:17 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://[email protected]:62811] 16/03/04 10:50:17 INFO Utils: Successfully started service 'sparkDriver' on port 62811. 16/03/04 10:50:17 INFO SparkEnv: Registering MapOutputTracker 16/03/04 10:50:17 INFO SparkEnv: Registering BlockManagerMaster 16/03/04 10:50:17 INFO DiskBlockManager: Created local directory at C:\Users\xubo\AppData\Local\Temp\blockmgr-f729c5d8-48fd-4bba-80d6-48f0cc86692c 16/03/04 10:50:17 INFO MemoryStore: MemoryStore started with capacity 730.6 MB 16/03/04 10:50:17 INFO HttpFileServer: HTTP File server directory is C:\Users\xubo\AppData\Local\Temp\spark-52f1804d-6099-46a6-b52e-473d64044637\httpd-525f3e90-1ce9-451a-9abb-f2cbd979599c 16/03/04 10:50:17 INFO HttpServer: Starting HTTP Server 16/03/04 10:50:17 INFO Utils: Successfully started service 'HTTP file server' on port 62812. 16/03/04 10:50:17 INFO SparkEnv: Registering OutputCommitCoordinator 16/03/04 10:50:17 INFO Utils: Successfully started service 'SparkUI' on port 4040. 16/03/04 10:50:17 INFO SparkUI: Started SparkUI at http://219.219.220.162:4040 16/03/04 10:50:18 WARN MetricsSystem: Using default name DAGScheduler for source because spark.app.id is not set. 16/03/04 10:50:18 INFO Executor: Starting executor ID driver on host localhost 16/03/04 10:50:18 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 62819. 16/03/04 10:50:18 INFO NettyBlockTransferService: Server created on 62819 16/03/04 10:50:18 INFO BlockManagerMaster: Trying to register BlockManager 16/03/04 10:50:18 INFO BlockManagerMasterEndpoint: Registering block manager localhost:62819 with 730.6 MB RAM, BlockManagerId(driver, localhost, 62819) 16/03/04 10:50:18 INFO BlockManagerMaster: Registered BlockManager 16/03/04 10:50:18 INFO SparkContext: Starting job: lookup at Action1.scala:20 16/03/04 10:50:18 INFO DAGScheduler: Got job 0 (lookup at Action1.scala:20) with 1 output partitions 16/03/04 10:50:18 INFO DAGScheduler: Final stage: ResultStage 0(lookup at Action1.scala:20) 16/03/04 10:50:18 INFO DAGScheduler: Parents of final stage: List() 16/03/04 10:50:18 INFO DAGScheduler: Missing parents: List() 16/03/04 10:50:19 INFO DAGScheduler: Submitting ResultStage 0 (MapPartitionsRDD[5] at lookup at Action1.scala:20), which has no missing parents 16/03/04 10:50:19 INFO MemoryStore: ensureFreeSpace(2536) called with curMem=0, maxMem=766075207 16/03/04 10:50:19 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 2.5 KB, free 730.6 MB) 16/03/04 10:50:19 INFO MemoryStore: ensureFreeSpace(1430) called with curMem=2536, maxMem=766075207 16/03/04 10:50:19 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 1430.0 B, free 730.6 MB) 16/03/04 10:50:19 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:62819 (size: 1430.0 B, free: 730.6 MB) 16/03/04 10:50:19 INFO SparkContext: Created broadcast 0 from broadcast at DAGScheduler.scala:861 16/03/04 10:50:19 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 0 (MapPartitionsRDD[5] at lookup at Action1.scala:20) 16/03/04 10:50:19 INFO TaskSchedulerImpl: Adding task set 0.0 with 1 tasks 16/03/04 10:50:19 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, PROCESS_LOCAL, 2283 bytes) 16/03/04 10:50:19 INFO Executor: Running task 0.0 in stage 0.0 (TID 0) 16/03/04 10:50:19 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 906 bytes result sent to driver 16/03/04 10:50:19 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 95 ms on localhost (1/1) 16/03/04 10:50:19 INFO DAGScheduler: ResultStage 0 (lookup at Action1.scala:20) finished in 0.116 s 16/03/04 10:50:19 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool 16/03/04 10:50:19 INFO DAGScheduler: Job 0 finished: lookup at Action1.scala:20, took 0.503116 s 1 2 16/03/04 10:50:19 INFO SparkContext: Starting job: reduce at Action1.scala:25 16/03/04 10:50:19 INFO DAGScheduler: Got job 1 (reduce at Action1.scala:25) with 1 output partitions 16/03/04 10:50:19 INFO DAGScheduler: Final stage: ResultStage 1(reduce at Action1.scala:25) 16/03/04 10:50:19 INFO DAGScheduler: Parents of final stage: List() 16/03/04 10:50:19 INFO DAGScheduler: Missing parents: List() 16/03/04 10:50:19 INFO DAGScheduler: Submitting ResultStage 1 (ParallelCollectionRDD[6] at parallelize at Action1.scala:24), which has no missing parents 16/03/04 10:50:19 INFO MemoryStore: ensureFreeSpace(1192) called with curMem=3966, maxMem=766075207 16/03/04 10:50:19 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 1192.0 B, free 730.6 MB) 16/03/04 10:50:19 INFO MemoryStore: ensureFreeSpace(854) called with curMem=5158, maxMem=766075207 16/03/04 10:50:19 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 854.0 B, free 730.6 MB) 16/03/04 10:50:19 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on localhost:62819 (size: 854.0 B, free: 730.6 MB) 16/03/04 10:50:19 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:861 16/03/04 10:50:19 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 1 (ParallelCollectionRDD[6] at parallelize at Action1.scala:24) 16/03/04 10:50:19 INFO TaskSchedulerImpl: Adding task set 1.0 with 1 tasks 16/03/04 10:50:19 INFO TaskSetManager: Starting task 0.0 in stage 1.0 (TID 1, localhost, PROCESS_LOCAL, 2045 bytes) 16/03/04 10:50:19 INFO Executor: Running task 0.0 in stage 1.0 (TID 1) 16/03/04 10:50:19 INFO Executor: Finished task 0.0 in stage 1.0 (TID 1). 1031 bytes result sent to driver 16/03/04 10:50:19 INFO DAGScheduler: ResultStage 1 (reduce at Action1.scala:25) finished in 0.031 s 16/03/04 10:50:19 INFO TaskSetManager: Finished task 0.0 in stage 1.0 (TID 1) in 31 ms on localhost (1/1) 16/03/04 10:50:19 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool 16/03/04 10:50:19 INFO DAGScheduler: Job 1 finished: reduce at Action1.scala:25, took 0.053935 s 15 16/03/04 10:50:19 INFO SparkContext: Starting job: foreach at Action1.scala:29 16/03/04 10:50:19 INFO DAGScheduler: Got job 2 (foreach at Action1.scala:29) with 1 output partitions 16/03/04 10:50:19 INFO DAGScheduler: Final stage: ResultStage 2(foreach at Action1.scala:29) 16/03/04 10:50:19 INFO DAGScheduler: Parents of final stage: List() 16/03/04 10:50:19 INFO DAGScheduler: Missing parents: List() 16/03/04 10:50:19 INFO DAGScheduler: Submitting ResultStage 2 (MapPartitionsRDD[8] at filter at Action1.scala:29), which has no missing parents 16/03/04 10:50:19 INFO MemoryStore: ensureFreeSpace(1800) called with curMem=6012, maxMem=766075207 16/03/04 10:50:19 INFO MemoryStore: Block broadcast_2 stored as values in memory (estimated size 1800.0 B, free 730.6 MB) 16/03/04 10:50:19 INFO MemoryStore: ensureFreeSpace(1137) called with curMem=7812, maxMem=766075207 16/03/04 10:50:19 INFO MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 1137.0 B, free 730.6 MB) 16/03/04 10:50:19 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on localhost:62819 (size: 1137.0 B, free: 730.6 MB) 16/03/04 10:50:19 INFO SparkContext: Created broadcast 2 from broadcast at DAGScheduler.scala:861 16/03/04 10:50:19 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 2 (MapPartitionsRDD[8] at filter at Action1.scala:29) 16/03/04 10:50:19 INFO TaskSchedulerImpl: Adding task set 2.0 with 1 tasks 16/03/04 10:50:19 INFO TaskSetManager: Starting task 0.0 in stage 2.0 (TID 2, localhost, PROCESS_LOCAL, 2283 bytes) 16/03/04 10:50:19 INFO Executor: Running task 0.0 in stage 2.0 (TID 2) 16/03/04 10:50:19 INFO Executor: Finished task 0.0 in stage 2.0 (TID 2). 915 bytes result sent to driver a4:(c,1) a4:(d,1) a4:(b,1) a4:(b,2) a4:(b,3) a4:(a,1) a4:(a,2) 16/03/04 10:50:19 INFO TaskSetManager: Finished task 0.0 in stage 2.0 (TID 2) in 10 ms on localhost (1/1) 16/03/04 10:50:19 INFO TaskSchedulerImpl: Removed TaskSet 2.0, whose tasks have all completed, from pool 16/03/04 10:50:19 INFO DAGScheduler: ResultStage 2 (foreach at Action1.scala:29) finished in 0.011 s 16/03/04 10:50:19 INFO DAGScheduler: Job 2 finished: foreach at Action1.scala:29, took 0.021940 s 16/03/04 10:50:19 INFO SparkContext: Starting job: foreach at Action1.scala:30 16/03/04 10:50:19 INFO DAGScheduler: Registering RDD 3 (parallelize at Action1.scala:19) 16/03/04 10:50:19 INFO DAGScheduler: Got job 3 (foreach at Action1.scala:30) with 1 output partitions 16/03/04 10:50:19 INFO DAGScheduler: Final stage: ResultStage 4(foreach at Action1.scala:30) 16/03/04 10:50:19 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 3) 16/03/04 10:50:19 INFO DAGScheduler: Missing parents: List(ShuffleMapStage 3) 16/03/04 10:50:19 INFO DAGScheduler: Submitting ShuffleMapStage 3 (ParallelCollectionRDD[3] at parallelize at Action1.scala:19), which has no missing parents 16/03/04 10:50:19 INFO MemoryStore: ensureFreeSpace(1856) called with curMem=8949, maxMem=766075207 16/03/04 10:50:19 INFO MemoryStore: Block broadcast_3 stored as values in memory (estimated size 1856.0 B, free 730.6 MB) 16/03/04 10:50:19 INFO MemoryStore: ensureFreeSpace(1189) called with curMem=10805, maxMem=766075207 16/03/04 10:50:19 INFO MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 1189.0 B, free 730.6 MB) 16/03/04 10:50:19 INFO BlockManagerInfo: Added broadcast_3_piece0 in memory on localhost:62819 (size: 1189.0 B, free: 730.6 MB) 16/03/04 10:50:19 INFO SparkContext: Created broadcast 3 from broadcast at DAGScheduler.scala:861 16/03/04 10:50:19 INFO DAGScheduler: Submitting 1 missing tasks from ShuffleMapStage 3 (ParallelCollectionRDD[3] at parallelize at Action1.scala:19) 16/03/04 10:50:19 INFO TaskSchedulerImpl: Adding task set 3.0 with 1 tasks 16/03/04 10:50:19 INFO TaskSetManager: Starting task 0.0 in stage 3.0 (TID 3, localhost, PROCESS_LOCAL, 2272 bytes) 16/03/04 10:50:19 INFO Executor: Running task 0.0 in stage 3.0 (TID 3) 16/03/04 10:50:19 INFO Executor: Finished task 0.0 in stage 3.0 (TID 3). 1158 bytes result sent to driver 16/03/04 10:50:19 INFO TaskSetManager: Finished task 0.0 in stage 3.0 (TID 3) in 54 ms on localhost (1/1) 16/03/04 10:50:19 INFO TaskSchedulerImpl: Removed TaskSet 3.0, whose tasks have all completed, from pool 16/03/04 10:50:19 INFO DAGScheduler: ShuffleMapStage 3 (parallelize at Action1.scala:19) finished in 0.055 s 16/03/04 10:50:19 INFO DAGScheduler: looking for newly runnable stages 16/03/04 10:50:19 INFO DAGScheduler: running: Set() 16/03/04 10:50:19 INFO DAGScheduler: waiting: Set(ResultStage 4) 16/03/04 10:50:19 INFO DAGScheduler: failed: Set() 16/03/04 10:50:19 INFO DAGScheduler: Missing parents for ResultStage 4: List() 16/03/04 10:50:19 INFO DAGScheduler: Submitting ResultStage 4 (MapPartitionsRDD[9] at filter at Action1.scala:30), which is now runnable 16/03/04 10:50:19 INFO MemoryStore: ensureFreeSpace(2648) called with curMem=11994, maxMem=766075207 16/03/04 10:50:19 INFO MemoryStore: Block broadcast_4 stored as values in memory (estimated size 2.6 KB, free 730.6 MB) 16/03/04 10:50:19 INFO MemoryStore: ensureFreeSpace(1571) called with curMem=14642, maxMem=766075207 16/03/04 10:50:19 INFO MemoryStore: Block broadcast_4_piece0 stored as bytes in memory (estimated size 1571.0 B, free 730.6 MB) 16/03/04 10:50:19 INFO BlockManagerInfo: Added broadcast_4_piece0 in memory on localhost:62819 (size: 1571.0 B, free: 730.6 MB) 16/03/04 10:50:19 INFO SparkContext: Created broadcast 4 from broadcast at DAGScheduler.scala:861 16/03/04 10:50:19 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 4 (MapPartitionsRDD[9] at filter at Action1.scala:30) 16/03/04 10:50:19 INFO TaskSchedulerImpl: Adding task set 4.0 with 1 tasks 16/03/04 10:50:19 INFO TaskSetManager: Starting task 0.0 in stage 4.0 (TID 4, localhost, PROCESS_LOCAL, 1901 bytes) 16/03/04 10:50:19 INFO Executor: Running task 0.0 in stage 4.0 (TID 4) 16/03/04 10:50:19 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 1 blocks 16/03/04 10:50:19 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 5 ms r4:(d,1) 16/03/04 10:50:19 INFO Executor: Finished task 0.0 in stage 4.0 (TID 4). 1165 bytes result sent to driver r4:(a,3) 16/03/04 10:50:19 INFO TaskSetManager: Finished task 0.0 in stage 4.0 (TID 4) in 47 ms on localhost (1/1) r4:(b,6) 16/03/04 10:50:19 INFO DAGScheduler: ResultStage 4 (foreach at Action1.scala:30) finished in 0.048 s r4:(c,1) 16/03/04 10:50:19 INFO TaskSchedulerImpl: Removed TaskSet 4.0, whose tasks have all completed, from pool 16/03/04 10:50:19 INFO DAGScheduler: Job 3 finished: foreach at Action1.scala:30, took 0.152494 s 16/03/04 10:50:19 INFO SparkContext: Starting job: foreach at Action1.scala:34 16/03/04 10:50:19 INFO MapOutputTrackerMaster: Size of output statuses for shuffle 0 is 143 bytes 16/03/04 10:50:19 INFO DAGScheduler: Registering RDD 10 (sortBy at Action1.scala:33) 16/03/04 10:50:19 INFO DAGScheduler: Got job 4 (foreach at Action1.scala:34) with 1 output partitions 16/03/04 10:50:19 INFO DAGScheduler: Final stage: ResultStage 7(foreach at Action1.scala:34) 16/03/04 10:50:19 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 6) 16/03/04 10:50:19 INFO DAGScheduler: Missing parents: List(ShuffleMapStage 6) 16/03/04 10:50:19 INFO DAGScheduler: Submitting ShuffleMapStage 6 (MapPartitionsRDD[10] at sortBy at Action1.scala:33), which has no missing parents 16/03/04 10:50:19 INFO MemoryStore: ensureFreeSpace(3192) called with curMem=16213, maxMem=766075207 16/03/04 10:50:19 INFO MemoryStore: Block broadcast_5 stored as values in memory (estimated size 3.1 KB, free 730.6 MB) 16/03/04 10:50:19 INFO MemoryStore: ensureFreeSpace(1860) called with curMem=19405, maxMem=766075207 16/03/04 10:50:19 INFO MemoryStore: Block broadcast_5_piece0 stored as bytes in memory (estimated size 1860.0 B, free 730.6 MB) 16/03/04 10:50:19 INFO BlockManagerInfo: Added broadcast_5_piece0 in memory on localhost:62819 (size: 1860.0 B, free: 730.6 MB) 16/03/04 10:50:19 INFO SparkContext: Created broadcast 5 from broadcast at DAGScheduler.scala:861 16/03/04 10:50:19 INFO DAGScheduler: Submitting 1 missing tasks from ShuffleMapStage 6 (MapPartitionsRDD[10] at sortBy at Action1.scala:33) 16/03/04 10:50:19 INFO TaskSchedulerImpl: Adding task set 6.0 with 1 tasks 16/03/04 10:50:19 INFO TaskSetManager: Starting task 0.0 in stage 6.0 (TID 5, localhost, PROCESS_LOCAL, 1890 bytes) 16/03/04 10:50:19 INFO Executor: Running task 0.0 in stage 6.0 (TID 5) 16/03/04 10:50:19 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 1 blocks 16/03/04 10:50:19 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms 16/03/04 10:50:19 INFO Executor: Finished task 0.0 in stage 6.0 (TID 5). 1374 bytes result sent to driver 16/03/04 10:50:19 INFO TaskSetManager: Finished task 0.0 in stage 6.0 (TID 5) in 25 ms on localhost (1/1) 16/03/04 10:50:19 INFO DAGScheduler: ShuffleMapStage 6 (sortBy at Action1.scala:33) finished in 0.025 s 16/03/04 10:50:19 INFO TaskSchedulerImpl: Removed TaskSet 6.0, whose tasks have all completed, from pool 16/03/04 10:50:19 INFO DAGScheduler: looking for newly runnable stages 16/03/04 10:50:19 INFO DAGScheduler: running: Set() 16/03/04 10:50:19 INFO DAGScheduler: waiting: Set(ResultStage 7) 16/03/04 10:50:19 INFO DAGScheduler: failed: Set() 16/03/04 10:50:19 INFO DAGScheduler: Missing parents for ResultStage 7: List() 16/03/04 10:50:19 INFO DAGScheduler: Submitting ResultStage 7 (MapPartitionsRDD[13] at filter at Action1.scala:34), which is now runnable 16/03/04 10:50:19 INFO MemoryStore: ensureFreeSpace(3128) called with curMem=21265, maxMem=766075207 16/03/04 10:50:19 INFO MemoryStore: Block broadcast_6 stored as values in memory (estimated size 3.1 KB, free 730.6 MB) 16/03/04 10:50:19 INFO MemoryStore: ensureFreeSpace(1805) called with curMem=24393, maxMem=766075207 16/03/04 10:50:19 INFO MemoryStore: Block broadcast_6_piece0 stored as bytes in memory (estimated size 1805.0 B, free 730.6 MB) 16/03/04 10:50:19 INFO BlockManagerInfo: Added broadcast_6_piece0 in memory on localhost:62819 (size: 1805.0 B, free: 730.6 MB) 16/03/04 10:50:19 INFO SparkContext: Created broadcast 6 from broadcast at DAGScheduler.scala:861 16/03/04 10:50:19 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 7 (MapPartitionsRDD[13] at filter at Action1.scala:34) 16/03/04 10:50:19 INFO TaskSchedulerImpl: Adding task set 7.0 with 1 tasks 16/03/04 10:50:19 INFO TaskSetManager: Starting task 0.0 in stage 7.0 (TID 6, localhost, PROCESS_LOCAL, 1901 bytes) 16/03/04 10:50:19 INFO Executor: Running task 0.0 in stage 7.0 (TID 6) 16/03/04 10:50:19 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 1 blocks 16/03/04 10:50:19 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms 16/03/04 10:50:19 INFO Executor: Finished task 0.0 in stage 7.0 (TID 6). 1165 bytes result sent to driver s4:(d,1) s4:(c,1) s4:(a,3) s4:(b,6) 16/03/04 10:50:19 INFO TaskSetManager: Finished task 0.0 in stage 7.0 (TID 6) in 19 ms on localhost (1/1) 16/03/04 10:50:19 INFO DAGScheduler: ResultStage 7 (foreach at Action1.scala:34) finished in 0.019 s 16/03/04 10:50:19 INFO TaskSchedulerImpl: Removed TaskSet 7.0, whose tasks have all completed, from pool 16/03/04 10:50:19 INFO DAGScheduler: Job 4 finished: foreach at Action1.scala:34, took 0.073994 s 16/03/04 10:50:19 INFO SparkContext: Starting job: take at Action1.scala:39 16/03/04 10:50:19 INFO DAGScheduler: Got job 5 (take at Action1.scala:39) with 1 output partitions 16/03/04 10:50:19 INFO DAGScheduler: Final stage: ResultStage 8(take at Action1.scala:39) 16/03/04 10:50:19 INFO DAGScheduler: Parents of final stage: List() 16/03/04 10:50:19 INFO DAGScheduler: Missing parents: List() 16/03/04 10:50:19 INFO DAGScheduler: Submitting ResultStage 8 (ParallelCollectionRDD[3] at parallelize at Action1.scala:19), which has no missing parents 16/03/04 10:50:19 INFO MemoryStore: ensureFreeSpace(1224) called with curMem=26198, maxMem=766075207 16/03/04 10:50:19 INFO MemoryStore: Block broadcast_7 stored as values in memory (estimated size 1224.0 B, free 730.6 MB) 16/03/04 10:50:19 INFO MemoryStore: ensureFreeSpace(804) called with curMem=27422, maxMem=766075207 16/03/04 10:50:19 INFO MemoryStore: Block broadcast_7_piece0 stored as bytes in memory (estimated size 804.0 B, free 730.6 MB) 16/03/04 10:50:19 INFO BlockManagerInfo: Added broadcast_7_piece0 in memory on localhost:62819 (size: 804.0 B, free: 730.6 MB) 16/03/04 10:50:19 INFO SparkContext: Created broadcast 7 from broadcast at DAGScheduler.scala:861 16/03/04 10:50:19 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 8 (ParallelCollectionRDD[3] at parallelize at Action1.scala:19) 16/03/04 10:50:19 INFO TaskSchedulerImpl: Adding task set 8.0 with 1 tasks 16/03/04 10:50:19 INFO TaskSetManager: Starting task 0.0 in stage 8.0 (TID 7, localhost, PROCESS_LOCAL, 2283 bytes) 16/03/04 10:50:19 INFO Executor: Running task 0.0 in stage 8.0 (TID 7) 16/03/04 10:50:19 INFO Executor: Finished task 0.0 in stage 8.0 (TID 7). 1043 bytes result sent to driver 16/03/04 10:50:19 INFO TaskSetManager: Finished task 0.0 in stage 8.0 (TID 7) in 13 ms on localhost (1/1) 16/03/04 10:50:19 INFO TaskSchedulerImpl: Removed TaskSet 8.0, whose tasks have all completed, from pool 16/03/04 10:50:19 INFO DAGScheduler: ResultStage 8 (take at Action1.scala:39) finished in 0.018 s 16/03/04 10:50:19 INFO DAGScheduler: Job 5 finished: take at Action1.scala:39, took 0.064104 s t4:(c,1) 16/03/04 10:50:20 INFO BlockManagerInfo: Removed broadcast_6_piece0 on localhost:62819 in memory (size: 1805.0 B, free: 730.6 MB) 16/03/04 10:50:20 INFO ContextCleaner: Cleaned accumulator 7 16/03/04 10:50:20 INFO BlockManagerInfo: Removed broadcast_5_piece0 on localhost:62819 in memory (size: 1860.0 B, free: 730.6 MB) 16/03/04 10:50:20 INFO ContextCleaner: Cleaned accumulator 6 16/03/04 10:50:20 INFO BlockManagerInfo: Removed broadcast_4_piece0 on localhost:62819 in memory (size: 1571.0 B, free: 730.6 MB) 16/03/04 10:50:20 INFO ContextCleaner: Cleaned accumulator 5 16/03/04 10:50:20 INFO BlockManagerInfo: Removed broadcast_3_piece0 on localhost:62819 in memory (size: 1189.0 B, free: 730.6 MB) 16/03/04 10:50:20 INFO ContextCleaner: Cleaned accumulator 4 16/03/04 10:50:20 INFO BlockManagerInfo: Removed broadcast_2_piece0 on localhost:62819 in memory (size: 1137.0 B, free: 730.6 MB) 16/03/04 10:50:20 INFO ContextCleaner: Cleaned accumulator 3 16/03/04 10:50:20 INFO BlockManagerInfo: Removed broadcast_1_piece0 on localhost:62819 in memory (size: 854.0 B, free: 730.6 MB) 16/03/04 10:50:20 INFO ContextCleaner: Cleaned accumulator 2 16/03/04 10:50:20 INFO BlockManagerInfo: Removed broadcast_0_piece0 on localhost:62819 in memory (size: 1430.0 B, free: 730.6 MB) 16/03/04 10:50:20 INFO ContextCleaner: Cleaned accumulator 1 16/03/04 10:50:25 WARN : Your hostname, xubo-PC resolves to a loopback/non-reachable address: fe80:0:0:0:0:5efe:c0a8:16c%19, but we couldn't find any external IP address! 16/03/04 10:50:44 INFO deprecation: mapred.tip.id is deprecated. Instead, use mapreduce.task.id 16/03/04 10:50:44 INFO deprecation: mapred.task.id is deprecated. Instead, use mapreduce.task.attempt.id 16/03/04 10:50:44 INFO deprecation: mapred.task.is.map is deprecated. Instead, use mapreduce.task.ismap 16/03/04 10:50:44 INFO deprecation: mapred.task.partition is deprecated. Instead, use mapreduce.task.partition 16/03/04 10:50:44 INFO deprecation: mapred.job.id is deprecated. Instead, use mapreduce.job.id 16/03/04 10:50:46 INFO SparkContext: Starting job: saveAsTextFile at Action1.scala:44 16/03/04 10:50:46 INFO MapOutputTrackerMaster: Size of output statuses for shuffle 0 is 143 bytes 16/03/04 10:50:46 INFO MapOutputTrackerMaster: Size of output statuses for shuffle 1 is 143 bytes 16/03/04 10:50:46 INFO DAGScheduler: Got job 6 (saveAsTextFile at Action1.scala:44) with 1 output partitions 16/03/04 10:50:46 INFO DAGScheduler: Final stage: ResultStage 11(saveAsTextFile at Action1.scala:44) 16/03/04 10:50:46 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 10) 16/03/04 10:50:46 INFO DAGScheduler: Missing parents: List() 16/03/04 10:50:46 INFO DAGScheduler: Submitting ResultStage 11 (MapPartitionsRDD[14] at saveAsTextFile at Action1.scala:44), which has no missing parents 16/03/04 10:50:46 INFO MemoryStore: ensureFreeSpace(128432) called with curMem=2028, maxMem=766075207 16/03/04 10:50:46 INFO MemoryStore: Block broadcast_8 stored as values in memory (estimated size 125.4 KB, free 730.5 MB) 16/03/04 10:50:46 INFO MemoryStore: ensureFreeSpace(43159) called with curMem=130460, maxMem=766075207 16/03/04 10:50:46 INFO MemoryStore: Block broadcast_8_piece0 stored as bytes in memory (estimated size 42.1 KB, free 730.4 MB) 16/03/04 10:50:46 INFO BlockManagerInfo: Added broadcast_8_piece0 in memory on localhost:62819 (size: 42.1 KB, free: 730.5 MB) 16/03/04 10:50:46 INFO SparkContext: Created broadcast 8 from broadcast at DAGScheduler.scala:861 16/03/04 10:50:46 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 11 (MapPartitionsRDD[14] at saveAsTextFile at Action1.scala:44) 16/03/04 10:50:46 INFO TaskSchedulerImpl: Adding task set 11.0 with 1 tasks 16/03/04 10:50:46 INFO TaskSetManager: Starting task 0.0 in stage 11.0 (TID 8, localhost, PROCESS_LOCAL, 1901 bytes) 16/03/04 10:50:46 INFO Executor: Running task 0.0 in stage 11.0 (TID 8) 16/03/04 10:50:46 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 1 blocks 16/03/04 10:50:46 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms 16/03/04 10:50:56 INFO FileOutputCommitter: Saved output of task 'attempt_201603041050_0011_m_000000_8' to hdfs://219.219.220.149:9000/output/20160304105019961/_temporary/0/task_201603041050_0011_m_000000 16/03/04 10:50:56 INFO SparkHadoopMapRedUtil: attempt_201603041050_0011_m_000000_8: Committed 16/03/04 10:50:56 INFO Executor: Finished task 0.0 in stage 11.0 (TID 8). 2080 bytes result sent to driver 16/03/04 10:50:56 INFO TaskSetManager: Finished task 0.0 in stage 11.0 (TID 8) in 9642 ms on localhost (1/1) 16/03/04 10:50:56 INFO TaskSchedulerImpl: Removed TaskSet 11.0, whose tasks have all completed, from pool 16/03/04 10:50:56 INFO DAGScheduler: ResultStage 11 (saveAsTextFile at Action1.scala:44) finished in 9.643 s 16/03/04 10:50:56 INFO DAGScheduler: Job 6 finished: saveAsTextFile at Action1.scala:44, took 9.742452 s 16/03/04 10:50:56 INFO SparkUI: Stopped Spark web UI at http://219.219.220.162:4040 16/03/04 10:50:56 INFO DAGScheduler: Stopping DAGScheduler 16/03/04 10:50:56 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped! 16/03/04 10:50:57 INFO MemoryStore: MemoryStore cleared 16/03/04 10:50:57 INFO BlockManager: BlockManager stopped 16/03/04 10:50:57 INFO BlockManagerMaster: BlockManagerMaster stopped 16/03/04 10:50:57 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped! 16/03/04 10:50:57 INFO SparkContext: Successfully stopped SparkContext 16/03/04 10:50:57 INFO ShutdownHookManager: Shutdown hook called 16/03/04 10:50:57 INFO ShutdownHookManager: Deleting directory C:\Users\xubo\AppData\Local\Temp\spark-52f1804d-6099-46a6-b52e-473d64044637 16/03/04 10:50:57 INFO RemoteActorRefProvider$RemotingTerminator: Shutting down remote daemon. 16/03/04 10:50:57 INFO RemoteActorRefProvider$RemotingTerminator: Remote daemon shut down; proceeding with flushing remote transports. Process finished with exit code 0