https://github.com/sunyaf/bitmapudf
本项目主要是将hive udf 、roaringbitmap 、hbase集成到一起
打包方法
mvn clean package
然后将target下jar包上传到hdfs指定目录,以方便创建udf函数,或者您也可以直接上传到服务器,通过add jar命令创建udf函数。
本人上传到了/utils/hiveUdf/
create function bit_map_byte as 'cc.youshu.roaringbitmap.RoaringBitMapByteUDAF'
using jar 'hdfs://nameservice1/utils/hiveUdf/hiveudf.jar';
SELECT plan_code,bit_map_byte(id) FROM dw.rw_plan GROUP BY plan_code;
create function hbase_put as 'cc.youshu.roaringbitmap.HbasePut_UDF'
using jar 'hdfs://nameservice1/utils/hiveUdf/hiveudf.jar';
SELECT hbase_put( map('hbase.zookeeper.quorum',
'XXX,XXX,XXX',
'table_name',
'test_roaring_syf',
'family',
'group',
'qualifier',
'q'), concat('group_plan_',plan_code),
val)
FROM(
SELECT plan_code,bit_map_byte(id) FROM dw.rw_plan GROUP BY plan_code) a;
create function hbase_put_add as 'cc.youshu.roaringbitmap.HbasePutAdd_UDF'
using jar 'hdfs://nameservice1/utils/hiveUdf/hiveudf.jar';
SELECT hbase_put_add( map('hbase.zookeeper.quorum',
'XXX,XXX,XXX',
'table_name',
'test_roaring_syf',
'family',
'group',
'qualifier',
'q'), concat('group_plan_',plan_code),
val)
FROM(
SELECT plan_code,bit_map_byte(id) FROM dw.rw_plan GROUP BY plan_code) a;
create function bitmap_to_id as 'cc.youshu.udtf.BitMapUDTF' using jar 'hdfs://nameservice1/utils/hiveUdf/hiveudf.jar';
SELECT key, BitMapCount(user_id) from hbase_table_1 LIMIT 10;
说明:hbase_table_1是我创建的一个hive on hbase的表,user_id列是存储的roaringbitmap的二进制
hbase_table_1表创建语句
CREATE EXTERNAL TABLE hbase_table_1(key String, user_id BINARY)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES (
"hbase.columns.mapping" = ":key,group:user_id"
)TBLPROPERTIES("hbase.table.name" = "user_roaring_bit_map_test");