基于Yarn的,使用新的API,SBT需要添加,默认是用的1.0.4的client
libraryDependencies += "org.apache.hadoop" % "hadoop-client" % "2.2.0"
package myclass import org.apache.spark.SparkContext import org.apache.hadoop.io.{Text, LongWritable, IntWritable} import org.apache.hadoop.mapreduce.lib.input.{TextInputFormat, KeyValueTextInputFormat} /** * Created by jack on 2/27/14. */ object MyHadoopFile { def main(args: Array[String]) { val spark = new SparkContext("local","my hadoop file",System.getenv("SPARK_HOME"),SparkContext.jarOfClass(this.getClass)) val file = spark.newAPIHadoopFile[LongWritable, Text, TextInputFormat]("hdfs://127.0.0.1:9000/user/jack/data/pagerank_data.txt") file.foreach(println) System.exit(0) } }结果
(0,1 2) (4,1 3) (8,1 4) (12,2 1) (16,3 1) (20,4 1)