scala spark 手动构建DataFrame复杂类型,arrayType,StructType
package biReportJob.zt
import org.apache.log4j.{Level, Logger}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.{IntegerType, StringType, StructType, _}
import org.apache.spark.sql.{Row, SparkSession}
object TestMain {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().master("local[*]").
config("hive.metastore.uris", "thrift://hdp02:9083").
import spark.implicits._
// Generate the schema based on the string of schema
val fields = Array(StructField("name", StringType, nullable = true), StructField("age", IntegerType, nullable = true))
// val schemaString = "name age"
// val fields = schemaString.split(" ") .map(fieldName => StructField(fieldName, StringType, nullable = true))
val schema = StructType(fields)
val rowsRdd: RDD[Row] = spark.sparkContext.parallelize(
Row("xiaoming", 30),
Row("ling", 28)
// Convert records of the RDD (people) to Rows.
val peopleDataFrame = spark.createDataFrame(rowsRdd, schema)
// val peopleDataFrame = Seq(Person("Andy", 32)).toDS()
// val udf_null = udf((s: Any) => null)
val udf_2 = udf((s: Any) => {
val arrayStruct = ArrayType(StructType(Seq(
StructField("k", StringType, true), StructField("v", DoubleType, true),
StructField("animal_interpretation", StructType(Seq(
StructField("is_large_animal", BooleanType, true),
StructField("is_mammal", BooleanType, true)
)), true)
val df1 = peopleDataFrame.withColumn("jsonstr", udf_2(col("age")))
val df2 = df1.withColumn("jsonData", from_json($"jsonstr", arrayStruct))
* output
* root
* |-- name: string (nullable = true)
* |-- age: integer (nullable = true)
* |-- jsonstr: string (nullable = true)
* +--------+---+--------------------+
* | name|age| jsonstr|
* +--------+---+--------------------+
* |xiaoming| 30|[{"k":"kkk","v":1...|
* | ling| 28|[{"k":"kkk","v":1...|
* +--------+---+--------------------+
* root
* |-- name: string (nullable = true)
* |-- age: integer (nullable = true)
* |-- jsonstr: string (nullable = true)
* |-- jsonData: array (nullable = true)
* | |-- element: struct (containsNull = true)
* | | |-- k: string (nullable = true)
* | | |-- v: double (nullable = true)
* | | |-- animal_interpretation: struct (nullable = true)
* | | | |-- is_large_animal: boolean (nullable = true)
* | | | |-- is_mammal: boolean (nullable = true)
* +--------+---+----------------------------------------------------------------------------------------+-----------------------------+
* |name |age|jsonstr |jsonData |
* +--------+---+----------------------------------------------------------------------------------------+-----------------------------+
* |xiaoming|30 |[{"k":"kkk","v":100,"animal_interpretation":{"is_large_animal":true,"is_mammal":false}}]|[[ kkk, 100.0, [true, false]]]|
* |ling |28 |[{"k":"kkk","v":100,"animal_interpretation":{"is_large_animal":true,"is_mammal":false}}]|[[ kkk, 100.0, [true, false]]]|
* +--------+---+----------------------------------------------------------------------------------------+-----------------------------+