bitmap类型数据导入hbase

create 'hfiletableBitmap','fm1','fm2'

准备数据:
vim data_bitmap.txt

key1    fm1:col1        100
key1    fm1:col2        200
key1    fm2:col1        300
key4    fm1:col1        400

hadoop fs -put data_bitmap.txt  /user/zhenxin3
package com.yzx;

import com.yzx.Tools.Util;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FsShell;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.roaringbitmap.buffer.MutableRoaringBitmap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;

public class BulkLoadJobBitmap {
    static Logger logger = LoggerFactory.getLogger(BulkLoadJobBitmap.class);

    public static class BulkLoadMap extends
            Mapper {

        public void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {

            String[] valueStrSplit = value.toString().split("\t");
            String hkey = valueStrSplit[0];
            String family = valueStrSplit[1].split(":")[0];
            String column = valueStrSplit[1].split(":")[1];
            String hvalue = valueStrSplit[2];
            final byte[] rowKey = Bytes.toBytes(hkey);
            final ImmutableBytesWritable HKey = new ImmutableBytesWritable(rowKey);
            Put HPut = new Put(rowKey);
            byte[] cell = Bytes.toBytes(hvalue);
            MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
            bitmap.add(Integer.parseInt(hvalue));
            HPut.add(Bytes.toBytes(family), Bytes.toBytes(column), Util.serializeBitmap(bitmap));
            context.write(HKey, HPut);

        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = HBaseConfiguration.create();
        String inputPath = args[0];
        String outputPath = args[1];
        HTable hTable = null;
        try {
            Job job = Job.getInstance(conf, "ExampleRead");
            job.setJarByClass(BulkLoadJobBitmap.class);
            job.setMapperClass(BulkLoadJobBitmap.BulkLoadMap.class);
            job.setMapOutputKeyClass(ImmutableBytesWritable.class);
            job.setMapOutputValueClass(Put.class);
            // speculation
            job.setSpeculativeExecution(false);
            job.setReduceSpeculativeExecution(false);
            // in/out format
            job.setInputFormatClass(TextInputFormat.class);
            job.setOutputFormatClass(HFileOutputFormat2.class);

            FileInputFormat.setInputPaths(job, inputPath);
            FileOutputFormat.setOutputPath(job, new Path(outputPath));

            hTable = new HTable(conf, args[2]);
            HFileOutputFormat2.configureIncrementalLoad(job, hTable);

            if (job.waitForCompletion(true)) {
                FsShell shell = new FsShell(conf);
                try {
                    shell.run(new String[]{"-chmod", "-R", "777", args[1]});
                } catch (Exception e) {
                    logger.error("Couldnt change the file permissions ", e);
                    throw new IOException(e);
                }
                //加载到hbase表
                LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
                loader.doBulkLoad(new Path(outputPath), hTable);
            } else {
                logger.error("loading failed.");
                System.exit(1);
            }

        } catch (IllegalArgumentException e) {
            e.printStackTrace();
        } finally {
            if (hTable != null) {
                hTable.close();
            }
        }
    }
}

package com.yzx.Tools;

import org.apache.hadoop.hbase.util.Bytes;
import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
import org.roaringbitmap.buffer.MutableRoaringBitmap;

import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;

public class Util {
    public static byte[] serializeBitmap(MutableRoaringBitmap bitmap)
            throws IOException {
        ByteArrayOutputStream bout = new ByteArrayOutputStream(
                bitmap.serializedSizeInBytes());
        DataOutputStream dos = new DataOutputStream(bout);
        bitmap.serialize(dos);
        return bout.toByteArray();
    }

    public static ImmutableRoaringBitmap deSerializeBitmap(byte[] value) {
        ByteBuffer buffer = ByteBuffer.wrap(value);
        return new ImmutableRoaringBitmap(buffer);
    }
    public static byte[] TO_ROWKEY(String partition) {
        ByteBuffer b = ByteBuffer.allocate(12);
        b.put(Bytes.toBytes(partition));
        return b.array();
    }
}

hadoop jar yzx_Hbase-1.0-SNAPSHOT-jar-with-dependencies.jar  com.yzx.BulkLoadJobBitmap  /user/zhenxin3/data_bitmap.txt /user/zhenxin3/bitmap_out hfiletableBitmap

hbase(main):007:0> scan 'hfiletableBitmap'
ROW                                              COLUMN+CELL                                                                                                                                  
 key1                                            column=fm1:col1, timestamp=1561994385923, value=:0\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00d\x00                              
 key1                                            column=fm1:col2, timestamp=1561994385923, value=:0\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\xC8\x00                           
 key1                                            column=fm2:col1, timestamp=1561994385923, value=:0\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00,\x01                              
 key4                                            column=fm1:col1, timestamp=1561994385923, value=:0\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x90\x01                           
2 row(s) in 0.0270 seconds

你可能感兴趣的:(Hbase)