更多Spark学习examples代码请见:https://github.com/xubo245/SparkLearning
1.主要是partition存储avro文件
2.代码:
/** * @author xubo * @time 20160502 * ref https://github.com/databricks/spark-avro */ package org.apache.spark.avro.learning import org.apache.spark.sql.SQLContext import org.apache.spark.SparkConf import org.apache.spark.SparkContext import java.text.SimpleDateFormat import java.util.Date import com.databricks.spark.avro._ /** * partitioned Avro records */ object AvroWritePartitioned { def main(args: Array[String]) { val conf = new SparkConf().setAppName("AvroWritePartitioned").setMaster("local") val sc = new SparkContext(conf) // import needed for the .avro method to be added val sqlContext = new SQLContext(sc) import sqlContext.implicits._ val df = Seq((2012, 8, "Batman", 9.8), (2012, 8, "Hero", 8.7), (2012, 7, "Robot", 5.5), (2011, 7, "Git", 2.0)) .toDF("year", "month", "title", "rating") df.show val iString = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date()) df.write.partitionBy("year", "month").avro("file/data/avro/output/episodes/WriteAvro" + iString) val dfread = sqlContext.read .format("com.databricks.spark.avro") .load("file/data/avro/output/episodes/WriteAvro" + iString) dfread.show val dfread2 = sqlContext.read.avro("file/data/avro/output/episodes/WriteAvro" + iString) dfread2.show } }
+----+-----+------+------+ |year|month| title|rating| +----+-----+------+------+ |2012| 8|Batman| 9.8| |2012| 8| Hero| 8.7| |2012| 7| Robot| 5.5| |2011| 7| Git| 2.0| +----+-----+------+------+ 2016-05-02 11:25:15 WARN :139 - Your hostname, xubo-PC resolves to a loopback/non-reachable address: fe80:0:0:0:200:5efe:ca26:54d2%20, but we couldn't find any external IP address! +------+------+----+-----+ | title|rating|year|month| +------+------+----+-----+ | Robot| 5.5|2012| 7| | Git| 2.0|2011| 7| |Batman| 9.8|2012| 8| | Hero| 8.7|2012| 8| +------+------+----+-----+ +------+------+----+-----+ | title|rating|year|month| +------+------+----+-----+ | Robot| 5.5|2012| 7| | Git| 2.0|2011| 7| |Batman| 9.8|2012| 8| | Hero| 8.7|2012| 8| +------+------+----+-----+