spark读取hbase表格记录实例 for IntelliJ IDEA

spark读取hbase表格记录实例 for IntelliJ IDEA

引入库

hbase2.0.1中lib库所有*.jar
scala-sdk-2.11.6
spark2.3.1 中spark\jars目录中所有*.jar

SparkWordCount.scala文件内容:

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase;
import org.apache.hadoop.hbase.client;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.SparkContext;
import org.apache.spark.SparkConf

object SparkWordCount {
  def main(args:Array[String]):Unit={
    val hbaseConf = HBaseConfiguration.create()
    hbaseConf.set("hbase.zookeeper.quorum","master,slave1,slave2");
    hbaseConf.set("hbase.zookeeper.property.clientPort","2181");
    //设置查询的表名
    hbaseConf.set(TableInputFormat.INPUT_TABLE, "t1");
    //
    val sparkConf=new SparkConf();
    sparkConf.setAppName("SparkWordCount");
    //启用本地spark执行
    sparkConf.setMaster("local[*]");
    sparkConf.set("spark.driver.allowMultipleContexts", "true")
    //spark集群执行
    //sparkConf.setMaster("spark://master:7077")
    //sparkConf.set("spark.executor.memory", "1024m")
    //
    val sc = new SparkContext(sparkConf);
    //
    val stuRDD = sc.newAPIHadoopRDD(hbaseConf, classOf[TableInputFormat],
      classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
      classOf[org.apache.hadoop.hbase.client.Result])
    //读取hbase t1表记录个数
    val count = stuRDD.count()
    println("t1 RDD Count:" + count)
    stuRDD.cache()

    //遍历输出   读取hbase t1表记录功能
    stuRDD.foreach({ case (_,result) =>
        val key = Bytes.toString(result.getRow)
        val info1 = Bytes.toString(result.getValue("content1".getBytes,"info1".getBytes))
        println("Row key:"+key+" content1:info1="+info1);
    })
  }  
}

执行结果为:
t1 RDD Count:6

Row key:0 content1:info1=aaa bbb ccc aaa ddd0
Row key:1 content1:info1=aaa bbb ccc aaa ddd1
Row key:2 content1:info1=aaa bbb ccc aaa ddd2
Row key:3 content1:info1=aaa bbb ccc aaa ddd3
Row key:4 content1:info1=aaa bbb ccc aaa ddd4
Row key:5 content1:info1=aaa bbb ccc aaa ddd5
–the–end—

你可能感兴趣的:(云平台,hadoop,分布式开发,存储)