spark 读取Hbase数据直接生成rdd

package com.lenovo.hbaseToMysql

import java.util.Properties

import com.lenovo.Utils.MysqlSession
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.sql.{SaveMode, SparkSession}

object HbaseToMysql {
  def main(args: Array[String]): Unit = {
    val ss = SparkSession
      .builder()
      .appName("HbaseToMysql ")
      .master("yarn")
      .getOrCreate()

    // HBase成产库
    val tableName = "upp:upp_user_profile"
    //Hbase客户端配置参数
    val masterPort = "60000"
    val zookeeperClientPort = "2181"
    val zookeeperQuorum = "node73.it.leap.com,node75.it.leap.com,node76.it.leap.com,node81.it.leap.com,node82.it.leap.com"
    val zookeeperZnodeParent = "/hbase-unsecure"

    //设置Hbase配置
    val conf = HBaseConfiguration.create()
    conf.set("hbase.master.port", masterPort)
    conf.set("hbase.zookeeper.property.clientport", zookeeperClientPort)
    conf.set("hbase.zookeeper.quorum", zookeeperQuorum)
    conf.set("zookeeper.znode.parent", zookeeperZnodeParent)
    conf.set(TableInputFormat.INPUT_TABLE, tableName)

    val sc = ss.sparkContext
    val hbase_data = sc.newAPIHadoopRDD(conf,classOf[TableInputFormat],classOf[ImmutableBytesWritable],classOf[Result])

    val tb_upp_Attitude = hbase_data.map(line => {
      val rowkey = Bytes.toString(line._2.getRow)
      val complaint = Bytes.toString(line._2.getValue(Bytes.toBytes("attitude"), Bytes.toBytes("complaint")))
      val praise = Bytes.toString(line._2.getValue(Bytes.toBytes("attitude"),Bytes.toBytes("praise")))
      val urgency = Bytes.toString(line._2.getValue(Bytes.toBytes("attitude"),Bytes.toBytes("urgency")))

      (rowkey,complaint,praise,urgency)
    }).toDF("user_name","complaint","praise","urgency")
  }
}

你可能感兴趣的:(spark)