Spark读取hbase数据

package actions

import java.util
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.{Cell, CellUtil, HBaseConfiguration}
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import org.json4s.jackson.Serialization
import scala.collection.mutable

object example {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("Practice").setMaster("local[2]")
    val sc = new SparkContext(conf)//连接hbase配置

    val hbaseConf: Configuration = HBaseConfiguration.create()
    //连接zookeeper集群
    hbaseConf.set("hbase.zookeeper.quorum", "hadoop102,hadoop103,hadoop104")
    //hbase表
    hbaseConf.set(TableInputFormat.INPUT_TABLE, "student")

    val rdd: RDD[(ImmutableBytesWritable, Result)] = sc.newAPIHadoopRDD(
      hbaseConf,
      classOf[TableInputFormat],//文件输入类型,textInputFormat
      classOf[ImmutableBytesWritable],//rowkey的封装
      classOf[Result]
    ) //结果的封装
    val rdd2 = rdd.map {
      case (iw, result) => {
        val map: mutable.Map[String, Any] = mutable.Map[String, Any]()
        //rowkey存入map
        map += "rowkey" -> Bytes.toString(iw.get())
        //再把每列存储
        val cells: util.List[Cell] = result.listCells()
        import scala.collection.JavaConversions._
        for (cell <- cells) {
          //列名+列值
          val key: String = Bytes.toString(CellUtil.cloneQualifier(cell))
          val value: String = Bytes.toString(CellUtil.cloneValue(cell))
          map += key -> value
        }
        //map直接返回
        //map
        //json格式返回,将map转成json
        implicit val df =org.json4s.DefaultFormats
        Serialization.write(map)
      }
    }
    rdd2.collect.foreach(println)
    //最终数据json格式
    //{"rowkey":"1001",name":"20","age":"lisi"}
    sc.stop()
  }
}

你可能感兴趣的:(SPARK)