spark 根据parquet 文件结构创建外部表 ( scala代码 )

zeppelin 运行

%livy2.spark

import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
import org.apache.hadoop.io.{ArrayWritable, NullWritable, Text}

val sqlContext = new org.apache.spark.sql.SQLContext(sc)
val df=sqlContext.read.parquet("/secure/projects/ndata/warehouse/edw/supply_model/dt=20211011/part-99476-f44490c6-cf7e-4411-972b-7b14e769ad11-c000.snappy.parquet")
val schema = df.schema
var columns = schema.fields

var ddl1 = "CREATE EXTERNAL TABLE supply_model ("
val cols = (for(column <- columns) yield column.name+" "+column.dataType.sql).mkString(",") 

var ddl2 = ddl1 + cols + " ) STORED AS PARQUET LOCATION '/secure/projects/ndata/warehouse/edw/supply_model/'"

println(ddl2)

spark.sql(ddl2)

[参照] https://stackoverflow.com/questions/33625617/creating-hive-table-using-parquet-file-metadata

你可能感兴趣的:(spark 根据parquet 文件结构创建外部表 ( scala代码 ))