1、输入数据:
[root@spark0 bigdata]# pwd /usr/local/spark-1.5.2-bin-hadoop2.6/bigdata [root@spark0 bigdata]# more wcDemo1.txt hadoop hive solr redis kafka hadoop storm flume sqoop docker spark spark hadoop spark elasticsearch hbase hadoop hive spark hive hadoop spark [root@spark0 bigdata]#
scala> val rdd=sc.textFile("/usr/local/spark-1.5.2-bin-hadoop2.6/bigdata/wcDemo1.txt").flatMap(_.split("\t")).map(x=>(x,1)).reduceByKey(_+_).collect rdd: Array[(String, Int)] = Array((spark,5), (hive,3), (hadoop,5), (docker,1), (flume,1), (solr,1), (storm,1), (elasticsearch,1), (kafka,1), (sqoop,1), (redis,1), (hbase,1))
scala> val rdd=sc.textFile("/usr/local/spark-1.5.2-bin-hadoop2.6/bigdata/wcDemo1.txt").flatMap(_.split("\t")).map(x=>(x,1)).reduceByKey(_+_).sortByKey().collect rdd: Array[(String, Int)] = Array((docker,1), (elasticsearch,1), (flume,1), (hadoop,5), (hbase,1), (hive,3), (kafka,1), (redis,1), (solr,1), (spark,5), (sqoop,1), (storm,1))
scala> val rdd=sc.textFile("/usr/local/spark-1.5.2-bin-hadoop2.6/bigdata/wcDemo1.txt").flatMap(_.split("\t")).map(x=>(x,1)).reduceByKey(_+_).sortByKey(false).collect rdd: Array[(String, Int)] = Array((storm,1), (sqoop,1), (spark,5), (solr,1), (redis,1), (kafka,1), (hive,3), (hbase,1), (hadoop,5), (flume,1), (elasticsearch,1), (docker,1))
scala> val rdd=sc.textFile("/usr/local/spark-1.5.2-bin-hadoop2.6/bigdata/wcDemo1.txt").flatMap(_.split("\t")).map(x=>(x,1)).reduceByKey(_+_).sortByKey(false).count rdd: Long = 12
scala> val rdd=sc.textFile("/usr/local/spark-1.5.2-bin-hadoop2.6/bigdata/wcDemo1.txt").flatMap(_.split("\t")).map(x=>(x,1)).reduceByKey(_+_).sortByKey(false).saveAsTextFile("/usr/local/spark-1.5.2-bin-hadoop2.6/bigdata/wcDemo_out") rdd: Unit = () scala>
[root@spark0 wcDemo_out]# pwd /usr/local/spark-1.5.2-bin-hadoop2.6/bigdata/wcDemo_out [root@spark0 bigdata]# cd wcDemo_out/ [root@spark0 wcDemo_out]# ll 总用量 4 -rw-r--r--. 1 root root 128 12月 13 22:35 part-00000 -rw-r--r--. 1 root root 0 12月 13 22:35 _SUCCESS [root@spark0 wcDemo_out]# pwd /usr/local/spark-1.5.2-bin-hadoop2.6/bigdata/wcDemo_out [root@spark0 wcDemo_out]# ll 总用量 4 -rw-r--r--. 1 root root 128 12月 13 22:35 part-00000 -rw-r--r--. 1 root root 0 12月 13 22:35 _SUCCESS [root@spark0 wcDemo_out]# more part-00000 (storm,1) (sqoop,1) (spark,5) (solr,1) (redis,1) (kafka,1) (hive,3) (hbase,1) (hadoop,5) (flume,1) (elasticsearch,1) (docker,1) [root@spark0 wcDemo_out]#