黑猴子的家:Spark Sql 写入数据到 Hive

1、Constants 常量

object Constants {

  val TABLE_USER_INFO = "user_info"
  val TABLE_PRODUCT_INFO = "product_info"
  val TABLE_USER_VISIT_ACTION = "user_visit_action"

}

2、DateModel

/**
  * 数量:100
  *
  * @param user_id      用户的ID[1 - 100]
  * @param username     用户的名称[user + id]
  * @param name         用户的姓名[name + id]
  * @param age          用户的年龄[1 - 60]
  * @param professional 用户的职业[profess + [1 - 100]
  * @param city         用户所在的城市[1 - 10]
  * @param sex          用户的性别[male,female]
  */
case class UserInfo(user_id: Int,
                    username: String,
                    name: String,
                    age: String,
                    professional: String,
                    city: String,
                    sex: String)

3、MockDataWareHouse

import java.util.UUID

import org.apache.commons.lang3.time.DateFormatUtils
import org.apache.spark.SparkConf
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.mutable.ArrayBuffer
import scala.util.Random

object MockDataWareHouse {

  //用于生成用户数据
  /**
    * 数量:100
    *
    *  user_id      用户的ID[1 - 100]
    *  username     用户的名称[user + id]
    *  name         用户的姓名[name + id]
    *  age          用户的年龄[1 - 60]
    *  professional 用户的职业[profess + [1 - 100]
    *  city         用户所在的城市[1 - 10]
    *  sex          用户的性别[male,female]
    */
  def userInfoGenerate(): Array[UserInfo] = {
    val array = ArrayBuffer[UserInfo]()
    val sexes = Array("male", "female")
    //生成随机数的工具类
    val random = new Random()

    for (i <- 0 to 100) {
      val user_id = i
      val username = "user" + user_id
      val name = "name" + user_id
      val age = random.nextInt(60)
      val professional = "profess" + random.nextInt(100)
      val city = random.nextInt(10)
      val sex = sexes(random.nextInt(2))

      array += UserInfo(user_id, username, name, age.toString, professional, city.toString, sex)
    }

    array.toArray
  }


  //插入到HIVE
  def saveInDataWarehouse(spark: SparkSession, table: String, data: DataFrame): Unit = {
    spark.sql("DROP TABLE IF EXISTS " + table)
    data.write.saveAsTable(table)
  }


 def main(args: Array[String]): Unit = {

    val logger = LoggerFactory.getLogger(this.getClass)

    //创建sparkConf
    val sparkConf = new SparkConf().setAppName("mock").setMaster("local[*]")

    //创建SparkSession
    val spark = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate()

    //创建Mock数据
    val userInfoData = this.userInfoGenerate()

    //将Mock数据转换成RDD,DF
    import spark.implicits._
    val userInfoDF = spark.sparkContext.makeRDD(userInfoData).toDF

    //将数据save到Hive
    saveInDataWarehouse(spark,Constants.TABLE_USER_INFO, userInfoDF)

    //关闭Spark
    spark.stop()
  }
}

4、pom.xml



    
        org.apache.spark
        spark-core_2.11
    

    
        org.apache.spark
        spark-hive_2.11
    

    
        org.apache.spark
        spark-sql_2.11
    




    
        
            net.alchim31.maven
            scala-maven-plugin
        
    

你可能感兴趣的:(Spark)