Spark全量增量从mysql抽取数据静态动态分区存入Hive

import org.apache.spark.sql.{DataFrame, DataFrameReader, SaveMode, SparkSession}

object readmysqlstatic {
  def main(args: Array[String]): Unit = {
    System.setProperty("HADOOP_USER_NAME", "root")
    val spark = SparkSession.builder()
      .master("local[*]")
      .appName("抽取数据")
      .enableHiveSupport()
      .config("spark.sql.warehouse.dir", "hdfs://hadoop1:50070/user/hive/warehouse")
      .config("hive.metastore.uris", "thrift://hadoop1:9083")
      .getOrCreate()

    spark.read
      .format("jdbc")
      .option("driver", "com.mysql.jdbc.Driver")
      .option("url", "jdbc:mysql://localhost:3306/courseDB")
      .option("user", "root")
      .option("password", "root")
      .option("dbtable", "t_score")
      .load()
      .createTempView("data")
//      spark.sql("select * from data").show()
      spark.sql("use wushiyou")
    //静态分区
      spark.sql(
      """
        |create table  if not exists b(
        |course_id string,
        |stud_id  string,
        |score double
        |)
        |partitioned by(score_id string)
        | row format delimited fields terminated by '\t'
        |""".stripMargin)
    spark.sql(
      """
        |insert overwrite table b  partition (score_id='01')
        |select course_id,stud_id  ,score
        |from data
        |""".stripMargin)
    println("-------------------------------------------------------------")



    spark.stop()
  }

}

import org.apache.spark.sql.SparkSession

object mysqlauto {
  def main(args: Array[String]): Unit = {
    System.setProperty("HADOOP_USER_NAME", "root")
    val spark = SparkSession.builder()
      .master("local[*]")
      .appName("抽取数据")
      .enableHiveSupport()
      .config("spark.sql.warehouse.dir", "hdfs://hadoop1:50070/user/hive/warehouse")
      .config("hive.metastore.uris", "thrift://hadoop1:9083")
      .getOrCreate()

    spark.read
      .format("jdbc")
      .option("driver", "com.mysql.jdbc.Driver")
      .option("url", "jdbc:mysql://localhost:3306/courseDB")
      .option("user", "root")
      .option("password", "root")
      .option("dbtable", "t_score")
      .load()
      .createTempView("data")
    //      spark.sql("select * from data").show()
    spark.sql("use wushiyou")
    //动态分区
    spark.sql(
      """
        |set hive.exec.dynamic.partition=true
        |""".stripMargin)
    spark.sql(
      """
        |set hive.exec.dynamic.partition.mode=nonstrict
        |""".stripMargin)
//    course_id string,
    spark.sql(
      """
        |create table  if not exists auto(

        |stud_id  string,
        |score double
        |)
        |partitioned by(score_id string,course_id string)
        | row format delimited fields terminated by '\t'
        |""".stripMargin)

    spark.sql(
      """
        |insert overwrite table auto partition(score_id,course_id)
        |select stud_id,score ,score_id,course_id
        |from data
        |""".stripMargin)

    spark.stop()
  }

}

import org.apache.spark.sql.SparkSession

object zengliang {
  def main(args: Array[String]): Unit = {
//    System.setProperty("HADOOP_USER_NAME", "root")
    val spark = SparkSession.builder()
      .master("yarn")
      .appName("zengliang")
      .enableHiveSupport()
//      .config("spark.sql.warehouse.dir", "hdfs://hadoop1:50070/user/hive/warehouse")
//      .config("hive.metastore.uris", "thrift://hadoop1:9083")
      .getOrCreate()

    spark.read
      .format("jdbc")
      .option("driver", "com.mysql.jdbc.Driver")
//      CourseDB
      .option("url", "jdbc:mysql://192.168.202.134:3306/CourseDB")
      .option("user", "root")
      .option("password", "123456")
      .option("dbtable", "T_score")
      .load()
      .createTempView("data")
    spark.sql("use wushiyou")

    spark.sql(
      """
        |set hive.exec.dynamic.partition=true
        |""".stripMargin)
    spark.sql(
      """
        |set hive.exec.dynamic.partition.mode=nonstrict
        |""".stripMargin)

    //insert overwrite table b partition(score_id)
    //增量
    spark.sql(
      """
        |insert  into table auto partition(score_id,course_id)
        |select  stud_id,score ,score_id,course_id
        |from data
        |""".stripMargin)


    spark.stop()
  }

}

你可能感兴趣的:(spark,hive,mysql,hadoop)