描述 不说 百度!
代码:
package HbaseBulkLoadTest; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2; import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; public class BULKLOAD { static Logger logger = LoggerFactory.getLogger(BULKLOAD.class); public static class MyMap extends Mapper,Text,ImmutableBytesWritable,Put>{ public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{ // key1 fm1:col1 value1 String[] valueStrSplit = value.toString().split("\t"); String hkey = valueStrSplit[0]; String family = valueStrSplit[1].split(":")[0]; String column = valueStrSplit[1].split(":")[1]; String hvalue = valueStrSplit[2]; final byte[] rowKey = Bytes.toBytes(hkey); final ImmutableBytesWritable HKey = new ImmutableBytesWritable(rowKey); Put HPut = new Put(rowKey); byte[] cell = Bytes.toBytes(hvalue); HPut.add(Bytes.toBytes(family), Bytes.toBytes(column), cell); // MapReduce.Mapper.ConText context.write(HKey, HPut); } } public static void main (String args[])throws Exception{ Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum","172.16.11.221,172.16.11.222,172.16.11.223"); conf.set("hbase.zookeeper.property.clientPort", "2800"); HTable hTable = new HTable(conf,"bulkloadtest"); Job job = Job.getInstance(conf,"bulkloadtest"); job.setJarByClass(BULKLOAD.class); job.setMapperClass(MyMap.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(HFileOutputFormat2.class); String inPath = "hdfs://Machenmaster/hbase/BKLDTest/data.txt"; //本人是 hadoop HA 模式 所以这里不是 host:9000
logger.info("jjjjjjjjjjjjjjj"+ inPath);
String hfilePath = "hdfs://Machenmaster/hbase/BKLDTest/bkldOutPut"; FileInputFormat.addInputPath(job,new Path(inPath)); FileOutputFormat.setOutputPath(job,new Path(hfilePath)); HFileOutputFormat2.configureIncrementalLoad(job,hTable); job.waitForCompletion(true); LoadIncrementalHFiles load = new LoadIncrementalHFiles(conf); load.doBulkLoad(new Path(hfilePath),hTable); hTable.close(); }
hdfs 中的数据:
data,txt 内容: (就用网上一哥们写的来吧)
key1 fm1:col1 value1
key1 fm1:col2 value2
key1 fm2:col1 value3
key4 fm1:col1 value4
3 创建表:
hbase shell中进行建表:
hbase(main):004:0> create 'bulkloadtest','fm1','fm2'
4 打包方式:win10 IDEA maven
打包生成文件 (打包后上传到linux集群中):
[root@slaver3 share]# ls
doc hadoop HbaseForMe-1.0-SNAPSHOT.jar
[root@slaver3 share]# ^C
[root@slaver3 share]#
5 执行代码:
[root@slaver3 share]# hadoop jar HbaseForMe-1.0-SNAPSHOT.jar
6 截取部分图片:
7. hdfs 查看:
8 查看hbase 中的表:
hbase(main):004:0> scan 'bulkloadtest'
ROW COLUMN+CELL
key1 column=fm1:col1, timestamp=1522060321677, value=value1
key1 column=fm1:col2, timestamp=1522060321677, value=value2
key1 column=fm2:col1, timestamp=1522060321677, value=value3
key4 column=fm1:col1, timestamp=1522060321677, value=value4
2 row(s) in 0.1000 seconds
hbase(main):005:0>