lf: column family of LONG values (binary value)
sf: column family of STRING values
create 'test1', 'lf', 'sf'
一个用户(userX),在什么时间(tsX),作为rowkey
对什么产品(value:skuXXX),做了什么操作作为列名
比如,c1: click from homepage; c2: click from ad; s1: search from homepage; b1: buy
put 'test:test1', 'user1|ts1', 'sf:c1', 'sku1'
put 'test:test1', 'user1|ts2', 'sf:c1', 'sku2'
put 'test:test1', 'user1|ts3', 'sf:s1', 'sku3'
put 'test:test1', 'user2|ts4', 'sf:c1', 'sku11'
put 'test:test1', 'user2|ts5', 'sf:c2', 'sku22'
put 'test:test1', 'user2|ts6', 'sf:s1', 'sku33'
scan 'test:test1', FILTER=>"ValueFilter(=,'binary:sku1')"
scan 'test:test1', FILTER=>"ValueFilter(=,'substring:2')"
scan 'test:test1', FILTER=>"ColumnPrefixFilter('s2') AND ValueFilter(=,'substring:2')"
scan 'test:test1', FILTER=>"ColumnPrefixFilter('c1') AND ( ValueFilter(=,'substring:1') OR ValueFilter(=,'substring:2') )"
scan 'test:test1', FILTER => "PrefixFilter ('user1')"
FirstKeyOnlyFilter: 一个rowkey可以有多个version,同一个rowkey的同一个column也会有多个的值
KeyOnlyFilter: 只要key,不要value
scan 'test:test1', FILTER=>"FirstKeyOnlyFilter() AND ValueFilter(=,'binary:sku1') AND KeyOnlyFilter()"
scan 'test1', {STARTROW=>'user1|ts2', FILTER => "PrefixFilter ('user1')"
import org.apache.hadoop.hbase.filter.CompareFilter
import org.apache.hadoop.hbase.filter.SubstringComparator
import org.apache.hadoop.hbase.filter.RowFilterscan 'test:test1', {FILTER => RowFilter.new(CompareFilter::CompareOp.valueOf('EQUAL'), SubstringComparator.new('ts3'))}
import org.apache.hadoop.hbase.filter.CompareFilter
import org.apache.hadoop.hbase.filter.SubstringComparator
import org.apache.hadoop.hbase.filter.RowFilter
scan 'test:test1', {FILTER => RowFilter.new(CompareFilter::CompareOp.valueOf('EQUAL'), SubstringComparator.new('ts'))}