sparksql之通过 structType 创建 DataFrames(编程接口)

import org.apache.avro.generic.GenericData.StringType
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
import org.spark_project.dmg.pmml.True

object TestDataFrame2 {
  def main(args: Array[String]): Unit = {
    val conf=new SparkConf().setAppName("TestDataFrame2").setMaster("local")
    val sc=new SparkContext(conf)
    val SQLContext=new SQLContext(sc)
    //将本地的数据读入RDD
    val fileRDD=sc.textFile("D:\\Users\\shashahu\\Desktop\\work\\spark-2.4.4\\examples\\src\\main\\resources\\people.txt")
    //将RDD数据映射成Row,需要import org.apache.spark.sql.row
    val rowRDD:RDD[Row]=fileRDD.map(line=>{
      val fields=line.split(",")
      Row(fields(0),fields(1).trim.toInt)
    })

    //创建StructType来定义结构
    val structType: StructType = StructType(
      //字段名,字段类型,是否可以为空
//      StructField("name", StringType, true) ::
        StructField("age", IntegerType, true) :: Nil
    )
    /**
      * rows: java.util.List[Row],
      * schema: StructType
      * */
    val df: DataFrame = SQLContext.createDataFrame(rowRDD,structType)
    df.createOrReplaceTempView("people")
    SQLContext.sql("select * from people").show()
  }
}

使用结构体来进行创建表的相关语句。

你可能感兴趣的:(大数据组件)