import java.util.Date import org.apache.hadoop.hbase.client.HTable import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.client.Table import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2 import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.Job import org.apache.hadoop.hbase.client._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{TableName, KeyValue, HBaseConfiguration} import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapreduce._ object App7 { def main(args: Array[String]) { val sc = new SparkContext(new SparkConf()) val tableName = "output_table" val stagingFolder = "/user/hbase/spark/" val columnFamily1 = "f1" val conf = HBaseConfiguration.create() val conn = ConnectionFactory.createConnection(conf) val table: Table = conn.getTable(TableName.valueOf(tableName)) val regionLocator = conn.getRegionLocator(TableName.valueOf(tableName)) val job = Job.getInstance(conf) job.setJobName("DumpFile") job.setMapOutputKeyClass(classOf[ImmutableBytesWritable]) job.setMapOutputValueClass(classOf[KeyValue]) HFileOutputFormat2.configureIncrementalLoad(job, table, regionLocator) val date = new Date().getTime val rdd = sc.parallelize(Array( (Bytes.toBytes("41"), (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo1"))), (Bytes.toBytes("41"), (Bytes.toBytes(columnFamily1), Bytes.toBytes("b"), Bytes.toBytes("foo2.b"))), (Bytes.toBytes("42"), (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo2.a"))), (Bytes.toBytes("42"), (Bytes.toBytes(columnFamily1), Bytes.toBytes("c"), Bytes.toBytes("foo2.c"))), (Bytes.toBytes("43"), (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo3"))), (Bytes.toBytes("44"), (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("foo.1"))), (Bytes.toBytes("44"), (Bytes.toBytes(columnFamily1), Bytes.toBytes("b"), Bytes.toBytes("foo.2"))), (Bytes.toBytes("45"), (Bytes.toBytes(columnFamily1), Bytes.toBytes("a"), Bytes.toBytes("bar.1"))), (Bytes.toBytes("45"), (Bytes.toBytes(columnFamily1), Bytes.toBytes("d"), Bytes.toBytes("bar.2"))))).map(x => { (new ImmutableBytesWritable(x._1), new KeyValue(x._1, x._2._1, x._2._2, date, x._2._3)) }) //将日志保存到指定目录 rdd.saveAsNewAPIHadoopFile(stagingFolder, classOf[ImmutableBytesWritable], classOf[KeyValue], classOf[HFileOutputFormat2], conf) val load = new LoadIncrementalHFiles(conf) try { load.doBulkLoad(new Path(stagingFolder), table.asInstanceOf[HTable]) } finally { table.close() } conn.close() } }