spark sql 读取hbase中数据保存到hive

新建SaprkReadHbase类:

package sparksql
import java.util.Properties
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{SQLContext, SparkSession}

import scala.collection.mutable.ArrayBuffer
/**
  *
  *  yang
  *  2019/3/27 10:37
  *  1.0
  */
object SaprkReadHbase {

  def main(args: Array[String]): Unit = {
    val properties = new Properties()
    properties.load(Thread.currentThread().getContextClassLoader.getResourceAsStream("hbase.properties"))
    val ta= properties.getProperty("t1")
    val TABLES: Array[String]= ta.split(",")
    TABLES.foreach(s => print(s + ' '))

    val spark = SparkSession
      .builder()
      .master("local[2]")
      .appName("Spark Read  Hbase ")
      .enableHiveSupport()    //如果要读取hive的表,就必须使用这个
      .getOrCreate()
    val confg=new SparkConf().setMaster("master").setAppName("appName")
    val sc= new SparkContext(confg)
    val spark=new SQLContext(sc)

    val conf = HBaseConfiguration.create()
    conf.set("hbase.zookeeper.quorum",properties.getProperty("zookeeper.quorum"))
    conf.set("hbase.zookeeper.property.clientPort", properties.getProperty("zookeeper.property.clientPort"))
    TABLES.foreach(s => {
      conf.set(TableInputFormat.INPUT_TABLE, s)
      val Column=properties.getProperty(s)
      val Columns:Array[String]=Column.split(",")
      Columns.foreach(c=>print(c + "hbase字段名"))

    import spark.implicits._

    val hbaseRDD = sc.newAPIHadoopRDD(conf,classOf[TableInputFormat],classOf[ImmutableBytesWritable],classOf[Result])
    // 将数据映射为表  也就是将 RDD转化为 dataframe schema
    val shop = hbaseRDD.map(r=> {
      val arr = new ArrayBuffer[String]()
      Columns.foreach(c => {
        print(c + "hbase字段名")
        val fieldVal = Bytes.toString(r._2.getValue(Bytes.toBytes("f"), Bytes.toBytes(c)))
        arr.append(fieldVal)
      })
      (Bytes.toString(r._2.getRow),arr(0), arr(1))
    }).toDF()
    shop.createOrReplaceTempView("shop")
      val sql=s+"sql"
    spark.sql(properties.getProperty(sql)+"from shop")
        spark.sql("select * from shop").show()
    print(s + ' ')
    })
    sc.stop()
    spark.stop()
  }
}
hbase.properties文件:
zookeeper.quorum=cdh-node02,cdh-node03,cdh-node04
zookeeper.property.clientPort=2181

t1=CDR,CDR2
CDR=customer_id,create_id
CDR2=customer_id,create_id

CDRsql=insert overwrite table cdr select *
CDR2sql=insert overwrite table cdr2 select *

pom.xml文件:


    1.2.6
    2.0.0
    2.7.7
    1.2.2
    2.10.6
    2.10

    
         org.springframework.boot
         spring-boot-starter-web
    
    
        com.alibaba
        fastjson
        1.2.50
    

    
        org.testng
        testng
        6.9.9
        test
    

    
    
        org.scala-lang
        scala-library
        ${scala.version}
    
    
        org.apache.spark
        spark-sql_2.10
        ${spark.version}
        
            
                org.codehaus.janino
                commons-compiler
            
        
    
    
        com.fasterxml.jackson.core
        jackson-core
        2.6.5
    
    
        com.fasterxml.jackson.core
        jackson-annotations
        2.6.5
    
    
        com.fasterxml.jackson.core
        jackson-databind
        2.6.5
    
    
        org.codehaus.janino
        commons-compiler
        2.6.1
    
    
        org.codehaus.janino
        janino
        2.6.1
    

    
        org.apache.spark
        spark-hive_2.10
        ${spark.version}
    
    
        mysql
        mysql-connector-java
        5.1.27
    

    
        org.apache.hadoop
        hadoop-client
        ${hadoop.version}
    
    
    
        org.apache.hbase
        hbase-client
        ${hbase.version}
    
    
        org.apache.hbase
        hbase-server
        ${hbase.version}
    

你可能感兴趣的:(spark,sql)