Spark Core读取SequenceFile/Protobuf,和推导公式

import org.apache.hadoop.io.BytesWritable
import org.apache.spark.{SparkConf, SparkContext}

object ReadSeqenceFileApp {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
//      .setAppName("ReadSeqenceFileApp")
//        .setMaster("local[2]")
    val sc = new SparkContext(conf)

    val file = sc.sequenceFile[BytesWritable,Text]("/seq")
    file.collect()
    file.map(x => (x._1.copyBytes(),x._2)).collect()
    file.map(x => (x._2)).collect()
    file.map(x => (x._2.split("\t"))).collect()
    file.map(x => (x._2.split("\t"))).map(x => (x(0),x(1))).collect().foreach(println)

    sc.stop
  }
}

你可能感兴趣的:(Spark)