更多Spark学习examples代码请见:https://github.com/xubo245/SparkLearning
1.安装:
https://repo.maven.apache.org/maven2/com/databricks/spark-avro_2.10/2.0.1/导入到Spark项目里
文件:
https://github.com/databricks/spark-avro/raw/master/src/test/resources/episodes.avro
2.使用
代码:
/** * @author xubo * @time 20160502 * ref https://github.com/databricks/spark-avro */ package org.apache.spark.avro.learning import org.apache.spark.sql.SQLContext import org.apache.spark.SparkConf import org.apache.spark.SparkContext import java.text.SimpleDateFormat import java.util.Date object learning1 { def main(args: Array[String]) { val conf = new SparkConf().setAppName("readFileFromFaFq").setMaster("local") val sc = new SparkContext(conf) // import needed for the .avro method to be added import com.databricks.spark.avro._ val sqlContext = new SQLContext(sc) // The Avro records get converted to Spark types, filtered, and // then written back out as Avro records val df = sqlContext.read.avro("file/data/avro/input/episodes.avro") df.show val iString = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date()) df.filter("doctor > 5").write.avro("file/data/avro/output/episodes/avro" + iString) df.filter("doctor > 5").show } }
Objavro.schema?{"type":"record","name":"episodes","namespace":"testing.hive.avro.serde","fields":[{"name":"title","type":"string","doc":"episode title"},{"name":"air_date","type":"string","doc":"initial date"},{"name":"doctor","type":"int","doc":"main actor playing the Doctor in episode"}]} 巏RLS|]Z^{??"The Eleventh Hour3 April 2010"The Doctor's Wife14 May 2011&Horror of Fang Rock 3 September 1977$An Unearthly Child 23 November 1963*The Mysterious Planet 6 September 1986Rose26 March 2005.The Power of the Daleks5 November 1966Castrolava4 January 1982 巏RLS|]Z^{?
+--------------------+----------------+------+ | title| air_date|doctor| +--------------------+----------------+------+ | The Eleventh Hour| 3 April 2010| 11| | The Doctor's Wife| 14 May 2011| 11| | Horror of Fang Rock|3 September 1977| 4| | An Unearthly Child|23 November 1963| 1| |The Mysterious Pl...|6 September 1986| 6| | Rose| 26 March 2005| 9| |The Power of the ...| 5 November 1966| 2| | Castrolava| 4 January 1982| 5| +--------------------+----------------+------+ +--------------------+----------------+------+ | title| air_date|doctor| +--------------------+----------------+------+ | The Eleventh Hour| 3 April 2010| 11| | The Doctor's Wife| 14 May 2011| 11| |The Mysterious Pl...|6 September 1986| 6| | Rose| 26 March 2005| 9| +--------------------+----------------+------+
Objavro.codecsnappyavro.schema�{"type":"record","name":"topLevelRecord","fields":[{"name":"title","type":["string","null"]},{"name":"air_date","type":["string","null"]},{"name":"doctor","type":["int","null"]}]}
https://github.com/databricks/spark-avro