黑猴子的家:Spark Sql 读取 Hive 数据

1、HiveAnalysis

import java.util.UUID
import net.sf.json.JSONObject
import org.apache.spark.SparkConf
import org.apache.spark.sql.{SaveMode, SparkSession}
import scala.collection.mutable

object HiveAnalysis {

  def main(args: Array[String]): Unit = {

    //创建sparkConf
    val sparkConf = new SparkConf().setAppName("session").setMaster("local[*]")

    //创建SparkSession
    val spark = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate()
    val sc = spark.sparkContext

    //根据配置信息从Hive中获取用户行为数据
    import spark.implicits._
    val userVisitActionDF = spark.sql("select * from " + Constants.TABLE_USER_VISIT_ACTION +
      " where date >= ' " + startDate + " and date <=" + endDate + " ' ").as[UserVisitAction]

    val userVisitActionRDD = userVisitActionDF.rdd

    //关闭Spark
    spark.close()
  }
}

2、DataModel

/**
  * 100个用户 => 每个用户10个Session => 每个Session100以内随机Action  
  * ("search", "click", "order", "pay")不能同时出现
  *
  * @param date               Session发生的日期yyyy-MM-dd
  * @param user_id            用户的ID[1 - 100]
  * @param session_id         sessionde唯一标识UUID
  * @param page_id            用户点击的页面[1 - 10]
  * @param action_time        行为发生的具体时间yyyy-MM-dd hh:mm:ss
  * @param search_keyword     搜索的关键字,
  * 从("火锅", "蛋糕", "重庆辣子鸡", "重庆小面", "呷哺呷哺", 
  * "新辣道鱼火锅", "国贸大厦", "太古商场", "日本料理", "温泉")
  * @param click_category_id  点击的物品的类别ID[1 - 100]
  * @param click_product_id   点击的物品ID类[1 - 100]
  * @param order_categroy_ids 下单的物品的类别ID[1 - 100]
  * @param order_product_ids  下单的物品ID[1 - 100]
  * @param pay_product_ids    支付的物品ID[1 - 100]
  * @param pay_categroy_ids   支付的物品的类别的ID[1 - 100]
  * @param city_id            行为发生的城市[1 - 10]
  */
case class UserVisitAction(date: String,
                           user_id: Int,
                           session_id: String,
                           page_id: String,
                           action_time: String,
                           search_keyword: String,
                           click_category_id: String,
                           click_product_id: String,
                           order_categroy_ids: String,
                           order_product_ids: String,
                           pay_product_ids: String,
                           pay_categroy_ids: String,
                           city_id: String)

3、pom.xml

    

        
            org.apache.spark
            spark-core_2.11
        

        
            org.apache.spark
            spark-hive_2.11
        

        
            org.apache.spark
            spark-sql_2.11
        

    

你可能感兴趣的:(Spark)