创建表create
'test1'
,
'lf'
,
'sf'
lf:
column family of LONG values (binary value)--
sf:
column family of STRING values 导入数据put
'test1'
,
'user1|ts1'
,
'sf:c1'
,
'sku1'
put
'test1'
,
'user1|ts2'
,
'sf:c1'
,
'sku188'
put
'test1'
,
'user1|ts3'
,
'sf:s1'
,
'sku123'
put
'test1'
,
'user2|ts4'
,
'sf:c1'
,
'sku2'
put
'test1'
,
'user2|ts5'
,
'sf:c2'
,
'sku288'
put
'test1'
,
'user2|ts6'
,
'sf:s1'
,
'sku222'
一个用户(userX),在什么时间(tsX),作为rowkey 对什么产品(value:skuXXX),做了什么操作作为列名,比如,
c1:
click from homepage;
c2:
click from ad;
s1:
search from homepage;
b1:
buy 查询案例 谁的值=sku188 scan
'test1'
, FILTER=>
"ValueFilter(=,'binary:sku188')"
ROW COLUMN+CELL user1|ts2 column=
sf:
c1, timestamp=
1409122354918
, value=sku188 谁的值包含
88
scan
'test1'
, FILTER=>
"ValueFilter(=,'substring:88')"
ROW COLUMN+CELL user1|ts2 column=
sf:
c1, timestamp=
1409122354918
, value=sku188 user2|ts5 column=
sf:
c2, timestamp=
1409122355030
, value=sku288 通过广告点击进来的(column为c2)值包含
88
的用户 scan
'test1'
, FILTER=>
"ColumnPrefixFilter('c2') AND ValueFilter(=,'substring:88')"
ROW COLUMN+CELL user2|ts5 column=
sf:
c2, timestamp=
1409122355030
, value=sku288通过搜索进来的(column为s)值包含
123
或者
222
的用户 scan
'test1'
, FILTER=>
"ColumnPrefixFilter('s') AND ( ValueFilter(=,'substring:123') OR ValueFilter(=,'substring:222') )"
ROW COLUMN+CELL user1|ts3 column=
sf:
s1, timestamp=
1409122354954
, value=sku123 user2|ts6 column=
sf:
s1, timestamp=
1409122355970
, value=sku222 rowkey为user1开头的 scan
'test1'
, FILTER =>
"PrefixFilter ('user1')"
ROW COLUMN+CELL user1|ts1 column=
sf:
c1, timestamp=
1409122354868
, value=sku1 user1|ts2 column=
sf:
c1, timestamp=
1409122354918
, value=sku188 user1|ts3 column=
sf:
s1, timestamp=
1409122354954
, value=sku123
FirstKeyOnlyFilter:
一个rowkey可以有多个version,同一个rowkey的同一个column也会有多个的值, 只拿出key中的第一个column的第一个version
KeyOnlyFilter:
只要key,不要valuescan
'test1'
, FILTER=>
"FirstKeyOnlyFilter() AND ValueFilter(=,'binary:sku188') AND KeyOnlyFilter()"
ROW COLUMN+CELL user1|ts2 column=
sf:
c1, timestamp=
1409122354918
, value= 从user1|ts2开始,找到所有的rowkey以user1开头的 scan
'test1'
, {STARTROW=>
'user1|ts2'
, FILTER =>
"PrefixFilter ('user1')"
}ROW COLUMN+CELL user1|ts2 column=
sf:
c1, timestamp=
1409122354918
, value=sku188 user1|ts3 column=
sf:
s1, timestamp=
1409122354954
, value=sku123 从user1|ts2开始,找到所有的到rowkey以user2开头 scan
'test1'
, {STARTROW=>
'user1|ts2'
, STOPROW=>
'user2'
}ROW COLUMN+CELL user1|ts2 column=
sf:
c1, timestamp=
1409122354918
, value=sku188 user1|ts3 column=
sf:
s1, timestamp=
1409122354954
, value=sku123查询rowkey里面包含ts3的
import
org.apache.hadoop.hbase.filter.CompareFilter
import
org.apache.hadoop.hbase.filter.SubstringComparator
import
org.apache.hadoop.hbase.filter.RowFilterscan
'test1'
, {FILTER => RowFilter.
new
(
CompareFilter:
:CompareOp.valueOf(
'EQUAL'
), SubstringComparator.
new
(
'ts3'
))}ROW COLUMN+CELL user1|ts3 column=
sf:
s1, timestamp=
1409122354954
, value=sku123 查询rowkey里面包含ts的
import
org.apache.hadoop.hbase.filter.CompareFilter
import
org.apache.hadoop.hbase.filter.SubstringComparator
import
org.apache.hadoop.hbase.filter.RowFilterscan
'test1'
, {FILTER => RowFilter.
new
(
CompareFilter:
:CompareOp.valueOf(
'EQUAL'
), SubstringComparator.
new
(
'ts'
))} ROW COLUMN+CELL user1|ts1 column=
sf:
c1, timestamp=
1409122354868
, value=sku1 user1|ts2 column=
sf:
c1, timestamp=
1409122354918
, value=sku188 user1|ts3 column=
sf:
s1, timestamp=
1409122354954
, value=sku123 user2|ts4 column=
sf:
c1, timestamp=
1409122354998
, value=sku2 user2|ts5 column=
sf:
c2, timestamp=
1409122355030
, value=sku288 user2|ts6 column=
sf:
s1, timestamp=
1409122355970
, value=sku222 加入一条测试数据put
'test1'
,
'user2|err'
,
'sf:s1'
,
'sku999'
查询rowkey里面以user开头的,新加入的测试数据并不符合正则表达式的规则,故查询不出来
import
org.apache.hadoop.hbase.filter.RegexStringComparator
import
org.apache.hadoop.hbase.filter.CompareFilter
import
org.apache.hadoop.hbase.filter.SubstringComparator
import
org.apache.hadoop.hbase.filter.RowFilterscan
'test1'
, {FILTER => RowFilter.
new
(
CompareFilter:
:CompareOp.valueOf(
'EQUAL'
),RegexStringComparator.
new
(
'^user\d+\|ts\d+$'
))}ROW COLUMN+CELL user1|ts1 column=
sf:
c1, timestamp=
1409122354868
, value=sku1 user1|ts2 column=
sf:
c1, timestamp=
1409122354918
, value=sku188 user1|ts3 column=
sf:
s1, timestamp=
1409122354954
, value=sku123 user2|ts4 column=
sf:
c1, timestamp=
1409122354998
, value=sku2 user2|ts5 column=
sf:
c2, timestamp=
1409122355030
, value=sku288 user2|ts6 column=
sf:
s1, timestamp=
1409122355970
, value=sku222加入测试数据put
'test1'
,
'user1|ts9'
,
'sf:b1'
,
'sku1'
b1开头的列中并且值为sku1的scan
'test1'
, FILTER=>
"ColumnPrefixFilter('b1') AND ValueFilter(=,'binary:sku1')"
ROW COLUMN+CELL user1|ts9 column=
sf:
b1, timestamp=
1409124908668
, value=sku1SingleColumnValueFilter的使用,b1开头的列中并且值为sku1的
import
org.apache.hadoop.hbase.filter.CompareFilter
import
org.apache.hadoop.hbase.filter.SingleColumnValueFilter
import
org.apache.hadoop.hbase.filter.SubstringComparatorscan
'test1'
, {COLUMNS =>
'sf:b1'
, FILTER => SingleColumnValueFilter.
new
(Bytes.toBytes(
'sf'
), Bytes.toBytes(
'b1'
),
CompareFilter:
:CompareOp.valueOf(
'EQUAL'
), Bytes.toBytes(
'sku1'
))} ROW COLUMN+CELL user1|ts9 column=
sf:
b1, timestamp=
1409124908668
, value=sku1hbase zkcli 的使用hbase zkclils /[hbase, zookeeper] [
zk:
hadoop000:
2181
(CONNECTED)
1
] ls /hbase[meta-region-server, backup-masters, table, draining, region-
in
-transition, running, table-lock, master, namespace, hbaseid, online-snapshot, replication, splitWAL, recovering-regions, rs] [
zk:
hadoop000:
2181
(CONNECTED)
2
] ls
/hbase/
table[member, test1,
hbase:
meta,
hbase:
namespace] [
zk:
hadoop000:
2181
(CONNECTED)
3
] ls
/hbase/
table/test1[] [
zk:
hadoop000:
2181
(CONNECTED)
4
] get
/hbase/
table/test1?master:
60000
}l$??lPBUFcZxid = 0x107ctime = Wed Aug 27 14:
52
:
21
HKT
2014
mZxid =
0x10b
mtime = Wed Aug
27
14
:
52
:
22
HKT
2014
pZxid =
0x107
cversion =
0
dataVersion =
2
aclVersion =
0
ephemeralOwner =
0x0
dataLength =
31
numChildren =
0