scala spark hbase 操作案例

s

1、软件版本Hadoop 2.6.4、 HBase 1.0.3、spark 1.6.0 、 scala-2.10.5

程序jar包版本: 

 hbase-server.1.0.3、 spark-core_2.10.1.1.0、


2、maven配置文件


  4.0.0
  spark-hbase2
  spark-hbase2
  0.0.1-SNAPSHOT
  ${project.artifactId}
  My wonderfull scala app
  2015
  
    
      My License
      http://....
      repo
    
   

	
	
		
			maven-apache-repo2
			Maven Plugin Repository 2
			http://repo2.maven.org/maven2
			
				true
			
			
				false
			
		
      
		 
			maven-centeral
			Maven Plugin Repository 2
			http://central.maven.org/maven2
			
				true
			 
			
				false
			 
		 
		
			maven-com-sun
			Maven2 repository of SUN
			http://download.java.net/maven/2
			
				true
			
			
				false
			
		
		
			logicaldoc.sourceforge.net
			logicaldoc.sourceforge.net
			http://logicaldoc.sourceforge.net/maven/
			
				true
			
			
				false
			
		
		
			server.oosnmp.net
			server.oosnmp.net
			https://server.oosnmp.net/dist/release
			
				true
			
			
				false
			
		
		
			codehaus.org
			codehaus.org
			http://repository.codehaus.org
			
				true
			
			
				false
			
		
		
			sonatype.org
			sonatype.org
			https://oss.sonatype.org/content/groups/jetty
			
				true
			
			
				false
			
		
	

  
    1.6
    1.6
    UTF-8
    2.11.5
    2.11
  

  
  
    
      org.scala-lang
      scala-library
      ${scala.version}
    

    
    
      junit
      junit
      4.11
      test
    
    
      org.specs2
      specs2-core_${scala.compat.version}
      2.4.16
      test
     
    
      org.scalatest
      scalatest_${scala.compat.version}
      2.2.4
      test
    
    

   
		org.apache.hbase
		hbase-server
		1.0.3
	 
	     
  
    
			org.apache.spark
			spark-core_2.10
			1.1.0
	  
    
    
   		 jdk.tools
   		 jdk.tools
   		 1.6
   		 system
   		 ${JAVA_HOME}/lib/tools.jar
	
    
  

 
    src/main/scala
    src/test/scala
    
       
        org.scala-tools
        maven-scala-plugin
        2.15.2
        
          
            
              compile
              testCompile
            
            
        			
						jar-with-dependencies
					
              
                -dependencyfile
                ${project.build.directory}/.scala_dependencies
              
            
          
        
      

      
				maven-assembly-plugin
				2.5.5
				
					false
					
						jar-with-dependencies
					
				
				
					
						make-assembly
						package
						
							assembly
						
					
				
			 
      
       
       
        org.apache.maven.plugins
        maven-surefire-plugin
        2.10
        
          false
          true
          
          
          
            **/*Test.*
            **/*Suite.*
          
        
      
      
      
    
  
  

3、scala 代码

package spark_hbase.spark_hbase

import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.HColumnDescriptor
import org.apache.hadoop.hbase.HTableDescriptor
import org.apache.hadoop.hbase.client.HBaseAdmin
import org.apache.hadoop.hbase.client.HBaseAdmin
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark._
import org.apache.spark._
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext
import org.apache.hadoop.hbase.client.Get
import org.apache.spark.serializer.KryoSerializer

object SparkHBase1 extends Serializable {
 def main(args: Array[String]): Unit = {
   
    val sparkConf = new SparkConf().setMaster("local").setAppName("HBaseTest")
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
    val sc = new SparkContext(sparkConf)
    var table_name = "test"
    val conf = HBaseConfiguration.create()
    
    conf.set("hbase.rootdir", "hdfs://wwwwww-1/hbase")
		conf.set("hbase.zookeeper.quorum", "11.11.131.19,11.11.131.20,11.11.131.21")
	  conf.set("hbase.zookeeper.property.clientPort", "2181")
		conf.set("hbase.master", "60001")
    conf.set(TableInputFormat.INPUT_TABLE, table_name)
    
    val hadmin = new HBaseAdmin(conf)
    
    if (!hadmin.isTableAvailable("test")) {    
      print("Table Not Exists! Create Table")    
      val tableDesc = new HTableDescriptor("test")    
      tableDesc.addFamily(new HColumnDescriptor("basic".getBytes()))  
      hadmin.createTable(tableDesc)    
    }else{
      print("Table  Exists!  not Create Table")   
    }    
    
    val table = new HTable(conf, "test");    
    for (i <- 1 to 5) {    
      var put = new Put(Bytes.toBytes("row" + i))   
      put.add(Bytes.toBytes("basic"), Bytes.toBytes("name"), Bytes.toBytes("value " + i))  
      table.put(put)   
    }    
    table.flushCommits()  
    
    //Scan操作  
    val hbaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],    
      classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],    
      classOf[org.apache.hadoop.hbase.client.Result])    
    
    val count = hbaseRDD.count()    
    println("HBase RDD Count:" + count)    
    hbaseRDD.cache()    
    
    val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
        classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
    classOf[org.apache.hadoop.hbase.client.Result])
    
    val g = new Get("row1".getBytes)
    val result = table.get(g)
    val value = Bytes.toString(result.getValue("basic".getBytes,"name".getBytes))
    println("GET id001 :"+value)
    
    
    hbaseRDD.cache()    
    print("------------------------scan----------")
    val res = hbaseRDD.take(count.toInt)    
    for (j <- 1 until count.toInt) {    
      println("j: " + j)    
      var rs = res(j - 1)._2    
      var kvs = rs.raw    
      for (kv <- kvs)    
        println("rowkey:" + new String(kv.getRow()) +    
          " cf:" + new String(kv.getFamily()) +    
          " column:" + new String(kv.getQualifier()) +    
          " value:" + new String(kv.getValue()))    
    }    
    
/*    println("-------------------------")
    println("--take1" + hBaseRDD.take(1))
    println("--count" + hBaseRDD.count())*/
     
    
    //insert_hbase(100002,3)
  }
  //写入hbase
 /* def insert_hbase(news_id:Int,type_id:Int): Unit ={
    var table_name = "news"
    val conf = HBaseConfiguration.create()
    conf.set("hbase.zookeeper.quorum","192.168.110.233, 192.168.110.234, 192.168.110.235");
    conf.set("hbase.zookeeper.property.clientPort", "2181");
    val table = new HTable(conf, table_name)
    val hadmin = new HBaseAdmin(conf)
    val row = Bytes.toBytes(news_id.toString())
    val p = new Put(row)
    p.add(Bytes.toBytes("content"),Bytes.toBytes("typeid"),Bytes.toBytes(type_id.toString()))
    table.put(p)
    table.close()
  } */
}


4、编译 

 mvn  clean scala:compile compile package -X

eclipse环境搭建 详见: http://blog.csdn.net/ldds_520/article/details/51830721


5、打包运行:

spark/bin/spark-submit --master spark://11.11.131.119:7077 --name spark-hbase --class "spark_hbase.spark_hbase.SparkHBase1"    /spark-hbase2-0.0.1-SNAPSHOT.jar localhost 9999


6、hadoop与HBase版本对应关系:


Hbase          Hadoop
0.92.0   1.0.0
0.92.1   1.0.0
0.92.2   1.0.3
0.94.0   1.0.2
0.94.1   1.0.3
0.94.2   1.0.3
0.94.3   1.0.4
0.94.4   1.0.4
0.94.5   1.0.4
0.94.9   1.2.0
0.95.0   1.2.0

hadoop1.2+hbase0.95.0+hive0.11.0 会产生hbase+hive的不兼容,创建hive+hbase的关联表就会报pair对异常。
hadoop1.2+hbase0.94.9+hive0.10.0 没问题,解决了上个版本的不兼容问题。




下面在给列出官网信息:
下面面符号的含义:
S =支持并且测试,
X = 不支持,
NT =应该可以,但是没有测试.



HBase-0.92.x
HBase-0.94.x
HBase-0.96.x
HBase-0.98.x [a]
HBase-1.0.x
Hadoop-0.20.205
S
X
X
X
X
Hadoop-0.22.x
S
X
X
X
X
Hadoop-1.0.0-1.0.2 [c]
X
X
X
X
X
Hadoop-1.0.3+
S
S
S
X
X
Hadoop-1.1.x
NT
S
S
X
X
Hadoop-0.23.x
X
S
NT
X
X
Hadoop-2.0.x-alpha
X
NT
X
X
X
Hadoop-2.1.0-beta
X
NT
S
X
X
Hadoop-2.2.0
X
NT  [d]
S
S
NT
Hadoop-2.3.x
X
NT
S
S
NT
Hadoop-2.4.x
X
NT
S
S
S
Hadoop-2.5.x
X
NT
S
S
S


你可能感兴趣的:(scala,java)