s
1、软件版本Hadoop 2.6.4、 HBase 1.0.3、spark 1.6.0 、 scala-2.10.5
程序jar包版本:
hbase-server.1.0.3、 spark-core_2.10.1.1.0、
2、maven配置文件
4.0.0
spark-hbase2
spark-hbase2
0.0.1-SNAPSHOT
${project.artifactId}
My wonderfull scala app
2015
My License
http://....
repo
maven-apache-repo2
Maven Plugin Repository 2
http://repo2.maven.org/maven2
true
false
maven-centeral
Maven Plugin Repository 2
http://central.maven.org/maven2
true
false
maven-com-sun
Maven2 repository of SUN
http://download.java.net/maven/2
true
false
logicaldoc.sourceforge.net
logicaldoc.sourceforge.net
http://logicaldoc.sourceforge.net/maven/
true
false
server.oosnmp.net
server.oosnmp.net
https://server.oosnmp.net/dist/release
true
false
codehaus.org
codehaus.org
http://repository.codehaus.org
true
false
sonatype.org
sonatype.org
https://oss.sonatype.org/content/groups/jetty
true
false
1.6
1.6
UTF-8
2.11.5
2.11
org.scala-lang
scala-library
${scala.version}
junit
junit
4.11
test
org.specs2
specs2-core_${scala.compat.version}
2.4.16
test
org.scalatest
scalatest_${scala.compat.version}
2.2.4
test
org.apache.hbase
hbase-server
1.0.3
org.apache.spark
spark-core_2.10
1.1.0
jdk.tools
jdk.tools
1.6
system
${JAVA_HOME}/lib/tools.jar
src/main/scala
src/test/scala
org.scala-tools
maven-scala-plugin
2.15.2
compile
testCompile
jar-with-dependencies
-dependencyfile
${project.build.directory}/.scala_dependencies
maven-assembly-plugin
2.5.5
false
jar-with-dependencies
make-assembly
package
assembly
org.apache.maven.plugins
maven-surefire-plugin
2.10
false
true
**/*Test.*
**/*Suite.*
package spark_hbase.spark_hbase
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.HColumnDescriptor
import org.apache.hadoop.hbase.HTableDescriptor
import org.apache.hadoop.hbase.client.HBaseAdmin
import org.apache.hadoop.hbase.client.HBaseAdmin
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark._
import org.apache.spark._
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext
import org.apache.hadoop.hbase.client.Get
import org.apache.spark.serializer.KryoSerializer
object SparkHBase1 extends Serializable {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setMaster("local").setAppName("HBaseTest")
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
val sc = new SparkContext(sparkConf)
var table_name = "test"
val conf = HBaseConfiguration.create()
conf.set("hbase.rootdir", "hdfs://wwwwww-1/hbase")
conf.set("hbase.zookeeper.quorum", "11.11.131.19,11.11.131.20,11.11.131.21")
conf.set("hbase.zookeeper.property.clientPort", "2181")
conf.set("hbase.master", "60001")
conf.set(TableInputFormat.INPUT_TABLE, table_name)
val hadmin = new HBaseAdmin(conf)
if (!hadmin.isTableAvailable("test")) {
print("Table Not Exists! Create Table")
val tableDesc = new HTableDescriptor("test")
tableDesc.addFamily(new HColumnDescriptor("basic".getBytes()))
hadmin.createTable(tableDesc)
}else{
print("Table Exists! not Create Table")
}
val table = new HTable(conf, "test");
for (i <- 1 to 5) {
var put = new Put(Bytes.toBytes("row" + i))
put.add(Bytes.toBytes("basic"), Bytes.toBytes("name"), Bytes.toBytes("value " + i))
table.put(put)
}
table.flushCommits()
//Scan操作
val hbaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
classOf[org.apache.hadoop.hbase.client.Result])
val count = hbaseRDD.count()
println("HBase RDD Count:" + count)
hbaseRDD.cache()
val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
classOf[org.apache.hadoop.hbase.client.Result])
val g = new Get("row1".getBytes)
val result = table.get(g)
val value = Bytes.toString(result.getValue("basic".getBytes,"name".getBytes))
println("GET id001 :"+value)
hbaseRDD.cache()
print("------------------------scan----------")
val res = hbaseRDD.take(count.toInt)
for (j <- 1 until count.toInt) {
println("j: " + j)
var rs = res(j - 1)._2
var kvs = rs.raw
for (kv <- kvs)
println("rowkey:" + new String(kv.getRow()) +
" cf:" + new String(kv.getFamily()) +
" column:" + new String(kv.getQualifier()) +
" value:" + new String(kv.getValue()))
}
/* println("-------------------------")
println("--take1" + hBaseRDD.take(1))
println("--count" + hBaseRDD.count())*/
//insert_hbase(100002,3)
}
//写入hbase
/* def insert_hbase(news_id:Int,type_id:Int): Unit ={
var table_name = "news"
val conf = HBaseConfiguration.create()
conf.set("hbase.zookeeper.quorum","192.168.110.233, 192.168.110.234, 192.168.110.235");
conf.set("hbase.zookeeper.property.clientPort", "2181");
val table = new HTable(conf, table_name)
val hadmin = new HBaseAdmin(conf)
val row = Bytes.toBytes(news_id.toString())
val p = new Put(row)
p.add(Bytes.toBytes("content"),Bytes.toBytes("typeid"),Bytes.toBytes(type_id.toString()))
table.put(p)
table.close()
} */
}
4、编译
mvn clean scala:compile compile package -X
eclipse环境搭建 详见: http://blog.csdn.net/ldds_520/article/details/51830721
5、打包运行:
spark/bin/spark-submit --master spark://11.11.131.119:7077 --name spark-hbase --class "spark_hbase.spark_hbase.SparkHBase1" /spark-hbase2-0.0.1-SNAPSHOT.jar localhost 9999
6、hadoop与HBase版本对应关系:
Hbase Hadoop
0.92.0 1.0.0
0.92.1 1.0.0
0.92.2 1.0.3
0.94.0 1.0.2
0.94.1 1.0.3
0.94.2 1.0.3
0.94.3 1.0.4
0.94.4 1.0.4
0.94.5 1.0.4
0.94.9 1.2.0
0.95.0 1.2.0
hadoop1.2+hbase0.95.0+hive0.11.0 会产生hbase+hive的不兼容,创建hive+hbase的关联表就会报pair对异常。
hadoop1.2+hbase0.94.9+hive0.10.0 没问题,解决了上个版本的不兼容问题。
下面在给列出官网信息:
下面面符号的含义:
S =支持并且测试,
X = 不支持,
NT =应该可以,但是没有测试.