spark连接hive的两种方式

spark连接hive的两种方式

在pom中添加依赖

 
  1. xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"

  2. xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

  3. 4.0.0

  4.  
  5. emg

  6. emg.spark

  7. 1.0-SNAPSHOT

  8.  
  9.  
  10. org.apache.spark

  11. spark-core_2.11

  12. 2.1.1

  13. org.apache.spark

  14. spark-sql_2.11

  15. 2.1.1

  16. org.apache.spark

  17. spark-hive_2.11

  18. 2.1.1

  19. org.scala-lang

  20. scala-library

  21. 2.11.8

  22. org.scala-lang

  23. scala-compiler

  24. 2.11.8

  25. org.scala-lang

  26. scala-reflect

  27. 2.11.8

  28.  
  29. org.apache.hadoop

  30. hadoop-common

  31. 2.7.2

  32. org.apache.hadoop

  33. hadoop-hdfs

  34. 2.7.2

  35. log4j

  36. log4j

  37. 1.2.17

  38.  
  39. mysql

  40. mysql-connector-java

  41. 5.1.38

  42.  
  43. org.apache.hive

  44. hive-jdbc

  45. 1.1.0

  46.  
  47.  
  48. net.alchim31.maven

  49. scala-maven-plugin

  50. 3.2.2

  51. org.apache.maven.plugins

  52. maven-compiler-plugin

  53. 3.5.1

  54. net.alchim31.maven

  55. scala-maven-plugin

  56. scala-compile-first

  57. process-resources

  58. add-source

  59. compile

  60. scala-test-compile

  61. process-test-resources

  62. testCompile

  63.  
  64. org.apache.maven.plugins

  65. maven-compiler-plugin

  66. compile

  67. compile

  68.  
  69.  
  70. org.apache.maven.plugins

  71. maven-shade-plugin

  72. 2.4.3

  73. package

  74. shade

  75. *:*

  76. META-INF/*.SF

  77. META-INF/*.DSA

  78. META-INF/*.RSA

  79. implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">

  80. emg.branchs.EmgFilterDemo

  81.  
  82.  
  83.  

 

方式1.使用sparkSQL直接连接hive

    经自己测试 ,hive的metastore启动不了,只启动hiveServer2,这种方式一直报错,找不到hive的元数据库

 
  1.  
  2. def main(args: Array[String]): Unit = {

  3.  
  4. val Array(inpath, dt, hour) = args

  5.  
  6.  
  7. val conf = new SparkConf().setAppName(this.getClass.getSimpleName)

  8. //.setMaster("local[*]")

  9. .setMaster("spark://192.168.40.52:7077")

  10. val session = SparkSession.builder()

  11. .config(conf)

  12. // 指定hive的metastore的端口 默认为9083 在hive-site.xml中查看

  13. .config("hive.metastore.uris", "thrift://192.168.40.51:9083")

  14. //指定hive的warehouse目录

  15. .config("spark.sql.warehouse.dir", "hdfs://192.168.40.51:9000/user/hive/warehouse")

  16. //直接连接hive

  17. .enableHiveSupport()

  18. .getOrCreate()

  19.  
  20.  
  21. import session.implicits._

  22.  
  23. val df1 = session.read.parquet(inpath)

  24. //df1.write.saveAsTable(s"tmp.tmp_app_log_1")

  25. df1.createOrReplaceTempView("tmp_app_log_test")

  26.  
  27. //sql的代码省略

  28. val sql1 =

  29. s"""

  30. |select *

  31. |from tmp_app_log_test

  32. """.stripMargin

  33.  
  34.  
  35. val hive_table = "dwb2.fact_mbk_offline_log_mbk_app_action_event_v2_i_h"

  36. val sql2 = s"alter table $hive_table add if not exists partition ( dt='$dt',hour='$hour')"

  37. session.sql(sql2)

  38.  
  39. val tmp_table =s"""tmp.app_log_${dt}_${hour}"""

  40. val sql3 = s"""drop table IF EXISTS $tmp_table""".stripMargin

  41. session.sql(sql3)

  42.  
  43. val df2 = session.sql(sql1)

  44. //结果先写入临时表

  45. df2.write.saveAsTable(tmp_table)

  46.  
  47.  
  48. //结果从临时表写入分区表

  49. val sql4 =

  50. s"""INSERT OVERWRITE TABLE $hive_table

  51. |PARTITION( dt='$dt',hour='$hour')

  52. | select * from $tmp_table """.stripMargin

  53.  
  54. session.sql(sql4)

  55.  
  56. val sql5 = s"""drop table IF EXISTS $tmp_table""".stripMargin

  57. session.sql(sql5)

  58. }

方式2 使用jdbc连接hive

    经自己测试  hive的metastore启动不了 只启动hiveServer2  jdbc连接方式可以正常使用

 

 
  1. def main(args: Array[String]): Unit = {

  2.  
  3. //经自己测试 hive的metastore启动不了 只启动hiveServer2 jdbc连接方式可以正常使用

  4. val conf = new SparkConf().setAppName(this.getClass.getSimpleName)

  5. //.setMaster("local[*]")

  6. .setMaster("spark://192.168.40.**:7077")

  7. val session = SparkSession.builder()

  8. .config(conf)

  9. .getOrCreate()

  10.  
  11. //注意这里的写法 好像是hive1.3版本以上不一样了 自行百度

  12. val url = "jdbc:hive2://192.168.40.**:10000/emg"

  13. val username = "root"

  14. val password = "123456"

  15.  
  16. val driverName = "org.apache.hive.jdbc.HiveDriver"

  17. try {

  18. Class.forName(driverName)

  19. } catch {

  20. case e: ClassNotFoundException =>

  21. println("Missing Class", e)

  22. }

  23. val con: Connection = DriverManager.getConnection(url, username, password)

  24. val state = con.createStatement()

  25.  
  26. import session.implicits._

  27. var paths = "/user/emg/cxb_out/" + CurrentTime.getMonthDate() + "/" + CurrentTime.getYesterday() + "/" + CurrentTime.getHourDate() + "/"

  28.  
  29. //由于hive的元数据库启动不了 连接不上 只能用jdbc的方式将结果load进hive表中

  30. var sql2 = "load data inpath '" + paths + "' into table result01"

  31.  
  32. try {

  33. val assertion = state.execute(sql2)

  34. state.execute(sql2)

  35. println("===============================存入hvie成功==========================")

  36. } catch {

  37. case e: Exception => e.printStackTrace()

  38. } finally {

  39. if (null != con) {

  40. con.close()

  41. }

  42. }

  43.  
  44. /* val sql =

  45. """

  46. |create external table zz_result(id bigint,lat float,lon float,utc bigint,tags int)

  47. |row format delimited fields terminated by '\t' location '/user/hive/zz'

  48. """.stripMargin

  49. state.executeQuery(sql)

  50. println("建表成功")

  51.  
  52.  
  53. try {

  54. val assertion = state.execute(sql)

  55. state.execute(sql)

  56. println("===============================存入hvie成功==========================")

  57. } catch {

  58. case e: Exception => e.printStackTrace()

  59. } finally {

  60. if (null != con) {

  61. con.close()

  62. }

  63. }

  64. */

  65.  
  66. session.close()

  67.  
  68. }

 

你可能感兴趣的:(spark)