目录
一、介绍
1.hbase运算符
2.Hbase 过滤器的比较器
二、代码
1.hbase建表
2.创建数据
3.导入依赖
4.列值过滤器
5.单列值过滤器
6.单列值排除过滤器
7.rowkey过滤器
8.rowkey前缀过滤器:PrefixFilter
9. 列簇过滤器
10.列过滤器
11.综合过滤器
过滤器可以根据列族、列、版本等更多的条件来对数据进行过滤, 基于 HBase 本身提供的三维有序(行键,列,版本有序),这些过滤器可以高效地完成查询过滤的任务,带有 过滤器条件的 RPC 查询请求会把过滤器分发到各个 RegionServer(这是一个服务端过滤器),这样也可以 降低网络传输的压力。 使用过滤器至少需要两类参数: 一类是抽象的比较运算符,另一类是比较器
// CompareFilter.CompareOp.LESS_OR_EQUAL
LESS <
LESS_OR_EQUAL <=
EQUAL =
NOT_EQUAL <>
GREATER_OR_EQUAL >=
GREATER >
NO_OP 排除所有
BinaryComparator 按字节索引顺序比较指定字节数组,采用Bytes.compareTo(byte[])
BinaryPrefixComparator 跟前面相同,只是比较左端的数据是否相同
NullComparator 判断给定的是否为空
BitComparator 按位比较 a BitwiseOp class 做异或,与,并操作
RegexStringComparator 提供一个正则的比较器,仅支持 EQUAL 和非EQUAL
SubstringComparator 判断提供的子串是否出现在table的value中。
比较过滤器:可应用于rowkey、列簇、列、列值过滤器
列值过滤器:ValueFilter
列过滤器:QualifierFilter
列簇过滤器:FamilyFilter
rowKey过滤器:RowFilter 专用过滤器:只能适用于特定的过滤器
单列值过滤器:SingleColumnValueFilter
列值排除过滤器:SingleColumnValueExcludeFilter
rowkey前缀过滤器:PrefixFilter
分页过滤器PageFilter
hbase(main):002:0> create 'emp2','info'
0 row(s) in 1.7460 seconds
=> Hbase::Table - emp2
put 'emp2','1001','info:name','zhangsan'
put 'emp2','1001','info:job','preader'
put 'emp2','1001','info:salary','35000'
put 'emp2','1001','info:deptName','TP'
put 'emp2','1002','info:name','lisi'
put 'emp2','1002','info:job','preader'
put 'emp2','1002','info:salary','35000'
put 'emp2','1002','info:deptName','AC'
put 'emps','1201','info:name','gopal'
put 'emps','1201','info:job','manager'
put 'emps','1201','info:salary','50000'
put 'emps','1201','info:deptName','TP'
put 'emp2','1202','info:name','manisha'
put 'emp2','1202','info:job','preader'
put 'emp2','1202','info:salary','50000'
put 'emp2','1202','info:deptName','TP'
put 'emp2','1203','info:name','kalil'
put 'emp2','1203','info:job','phpdev'
put 'emp2','1203','info:salary','30000'
put 'emp2','1203','info:deptName','AC'
put 'emp2','1204','info:name','prasanth'
put 'emp2','1204','info:job','phpdev'
put 'emp2','1204','info:salary','30000'
put 'emp2','1204','info:deptName','AC'
put 'emp2','1205','info:name','kranthi'
put 'emp2','1205','info:job','admin'
put 'emp2','1205','info:salary','20000'
put 'emp2','1205','info:deptName','TP'
put 'emp2','1206','info:name','satishp'
put 'emp2','1206','info:job','grpdes'
put 'emp2','1206','info:salary','20000'
put 'emp2','1206','info:deptName','GR'
org.apache.hadoop
hadoop-client
2.7.3
org.apache.hbase
hbase-client
1.4.13
org.apache.hbase
hbase-server
1.4.13
列值过滤器仅仅针对单元格中的值进行过滤,满足 比较运算符加上比较器 构成的过滤条件,则留下,否则为 null 虽然这里给出的过滤条件是 salary>30000 ,但是发现在代码里面根本没有指定salary这一列,因此实际上,列值过 滤器是与所有列的所有单元格进行比较。如果满足条件则保留数据,如果不满足则过滤掉该数据,查询时不满足 过滤条件的单元格都为null 这里的 id 和 name 等列的值也能显示出现,是因为这里的比较器是按照字节数组进行比较,id和name里面 的值都满足过滤条件,所以没有 变成null
package com.lenovo.Filter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
public class ValueFilterDemo {
//成员变量自动初始化
Connection connection;
Admin admin;
TableName tableName;
Table table;
/**
* @Date 2022.04.26
* @Description 获取连接以及表对象
*/
@Before
public void createConnection(){
//局部变量手动初始化
//获取配置对象
Configuration configuration = new Configuration();
configuration.set("hbase.zookeeper.quorum", "IP地址");
//获取连接
try {
connection = ConnectionFactory.createConnection(configuration);
//获取管理员对象
admin = connection.getAdmin();
//获取表名
tableName = tableName.valueOf("emp2");
//获取表的对象
table = connection.getTable(tableName);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* @Date 2022.04.26
* @Description 列值过滤器
* salary>30000
*/
@Test
public void test(){
//创建比较运算符以及比较运算器
BinaryComparator binaryComparator = new BinaryComparator("30000".getBytes());
//创建过滤器
ValueFilter valueFilter = new ValueFilter(CompareFilter.CompareOp.GREATER, binaryComparator);
//调用print方法
print(valueFilter);
}
//打印过滤后数据
private void print(Filter filter){
//创建Scan对象
Scan scan = new Scan();
//将过滤器放到scan对象
scan.setFilter(filter);
//调用scan
try {
ResultScanner scanner = table.getScanner(scan);
//解析
for (Result result : scanner) {
//scan扫描返回的时多行数据,遍历循环每一行的数据
// 利用getrow方法取出rowkey
// 利用getValue方法取出这一行的value值,根据列簇和列确定一个单元格的值
//拿到rowKey
String rowKey = Bytes.toString(result.getRow());//byte数组转字符串
//拿到其他列
String name = Bytes.toString(result.getValue("info".getBytes(), "name".getBytes()));
String job = Bytes.toString(result.getValue("info".getBytes(), "job".getBytes()));
String salary = Bytes.toString(result.getValue("info".getBytes(), "salary".getBytes()));
String deptName = Bytes.toString(result.getValue("info".getBytes(), "deptName".getBytes()));
System.out.println("id:"+rowKey+",name:"+name+",job:"+job+",salary:"+salary+",deptName:"+deptName);
}
} catch (IOException e) {
e.printStackTrace();
}
}
@After
public void close(){
try {
admin.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
connection.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
运行结果:
package com.lenovo.Filter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
public class ValueFilterDemo {
//成员变量自动初始化
Connection connection;
Admin admin;
TableName tableName;
Table table;
/**
* @Date 2022.04.26
* @Description 获取连接以及表对象
*/
@Before
public void createConnection(){
//局部变量手动初始化
//获取配置对象
Configuration configuration = new Configuration();
configuration.set("hbase.zookeeper.quorum", "IP地址");
//获取连接
try {
connection = ConnectionFactory.createConnection(configuration);
//获取管理员对象
admin = connection.getAdmin();
//获取表名
tableName = tableName.valueOf("emp2");
//获取表的对象
table = connection.getTable(tableName);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* @Date 2022.04.26
* @Description 单列值过滤器
* 可以指定一个列进行过滤
* 该过滤器会将符合过滤条件的列对应的cell所在的整行数据进行返回
* 如果某条数据的列不符合条件,则会将整条数据进行过滤
* 如果数据中不存在指定的列,则默认会直接返回,并且该列全为null
* salary>30000
*/
@Test
public void SingleColumnValueFilter() throws IOException {
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter(
"info".getBytes(),
"salary".getBytes(),
CompareFilter.CompareOp.GREATER,
"30000".getBytes()
);
print(singleColumnValueFilter);
}
//打印过滤后数据
private void print(Filter filter){
//创建Scan对象
Scan scan = new Scan();
//将过滤器放到scan对象
scan.setFilter(filter);
//调用scan
try {
ResultScanner scanner = table.getScanner(scan);
//解析
for (Result result : scanner) {
//scan扫描返回的时多行数据,遍历循环每一行的数据
// 利用getrow方法取出rowkey
// 利用getValue方法取出这一行的value值,根据列簇和列确定一个单元格的值
//拿到rowKey
String rowKey = Bytes.toString(result.getRow());//byte数组转字符串
//拿到其他列
String name = Bytes.toString(result.getValue("info".getBytes(), "name".getBytes()));
String job = Bytes.toString(result.getValue("info".getBytes(), "job".getBytes()));
String salary = Bytes.toString(result.getValue("info".getBytes(), "salary".getBytes()));
String deptName = Bytes.toString(result.getValue("info".getBytes(), "deptName".getBytes()));
System.out.println("id:"+rowKey+",name:"+name+",job:"+job+",salary:"+salary+",deptName:"+deptName);
}
} catch (IOException e) {
e.printStackTrace();
}
}
@After
public void close(){
try {
admin.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
connection.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
运行结果:
package com.lenovo.Filter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
public class ValueFilterDemo {
//成员变量自动初始化
Connection connection;
Admin admin;
TableName tableName;
Table table;
/**
* @Date 2022.04.26
* @Description 获取连接以及表对象
*/
@Before
public void createConnection(){
//局部变量手动初始化
//获取配置对象
Configuration configuration = new Configuration();
configuration.set("hbase.zookeeper.quorum", "IP地址");
//获取连接
try {
connection = ConnectionFactory.createConnection(configuration);
//获取管理员对象
admin = connection.getAdmin();
//获取表名
tableName = tableName.valueOf("emp2");
//获取表的对象
table = connection.getTable(tableName);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* @Date 2022.04.26
* @Description 单列值排除过滤器
* 列值过滤器返回的是全部的行,而单列值过滤器返回的是满足过滤条件的行
* salary=30000
*/
@Test
public void SingleColumnValueExcludeFilter(){
BinaryPrefixComparator binaryPrefixComparator = new BinaryPrefixComparator("30000".getBytes());
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter(
"info".getBytes(),
"salary".getBytes(),
CompareFilter.CompareOp.EQUAL,
binaryPrefixComparator
);
print(singleColumnValueFilter);
}
//打印过滤后数据
private void print(Filter filter){
//创建Scan对象
Scan scan = new Scan();
//将过滤器放到scan对象
scan.setFilter(filter);
//调用scan
try {
ResultScanner scanner = table.getScanner(scan);
//解析
for (Result result : scanner) {
//scan扫描返回的时多行数据,遍历循环每一行的数据
// 利用getrow方法取出rowkey
// 利用getValue方法取出这一行的value值,根据列簇和列确定一个单元格的值
//拿到rowKey
String rowKey = Bytes.toString(result.getRow());//byte数组转字符串
//拿到其他列
String name = Bytes.toString(result.getValue("info".getBytes(), "name".getBytes()));
String job = Bytes.toString(result.getValue("info".getBytes(), "job".getBytes()));
String salary = Bytes.toString(result.getValue("info".getBytes(), "salary".getBytes()));
String deptName = Bytes.toString(result.getValue("info".getBytes(), "deptName".getBytes()));
System.out.println("id:"+rowKey+",name:"+name+",job:"+job+",salary:"+salary+",deptName:"+deptName);
}
} catch (IOException e) {
e.printStackTrace();
}
}
@After
public void close(){
try {
admin.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
connection.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
运行结果:
package com.lenovo.Filter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
public class ValueFilterDemo {
//成员变量自动初始化
Connection connection;
Admin admin;
TableName tableName;
Table table;
/**
* @Date 2022.04.26
* @Description 获取连接以及表对象
*/
@Before
public void createConnection(){
//局部变量手动初始化
//获取配置对象
Configuration configuration = new Configuration();
configuration.set("hbase.zookeeper.quorum", "IP地址");
//获取连接
try {
connection = ConnectionFactory.createConnection(configuration);
//获取管理员对象
admin = connection.getAdmin();
//获取表名
tableName = tableName.valueOf("emp2");
//获取表的对象
table = connection.getTable(tableName);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* @Date 2022.04.26
* @Description rowkey过滤器
* rowkey过滤器加上前缀比较器
* 过滤出rowkey(id)以100开头的
*/
@Test
public void rowKeyFilter(){
BinaryPrefixComparator binaryPrefixComparator = new BinaryPrefixComparator("100".getBytes());
RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, binaryPrefixComparator);
print(rowFilter);
}
//打印过滤后数据
private void print(Filter filter){
//创建Scan对象
Scan scan = new Scan();
//将过滤器放到scan对象
scan.setFilter(filter);
//调用scan
try {
ResultScanner scanner = table.getScanner(scan);
//解析
for (Result result : scanner) {
//scan扫描返回的时多行数据,遍历循环每一行的数据
// 利用getrow方法取出rowkey
// 利用getValue方法取出这一行的value值,根据列簇和列确定一个单元格的值
//拿到rowKey
String rowKey = Bytes.toString(result.getRow());//byte数组转字符串
//拿到其他列
String name = Bytes.toString(result.getValue("info".getBytes(), "name".getBytes()));
String job = Bytes.toString(result.getValue("info".getBytes(), "job".getBytes()));
String salary = Bytes.toString(result.getValue("info".getBytes(), "salary".getBytes()));
String deptName = Bytes.toString(result.getValue("info".getBytes(), "deptName".getBytes()));
System.out.println("id:"+rowKey+",name:"+name+",job:"+job+",salary:"+salary+",deptName:"+deptName);
}
} catch (IOException e) {
e.printStackTrace();
}
}
@After
public void close(){
try {
admin.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
connection.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
运行结果:
package com.lenovo.Filter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
public class ValueFilterDemo {
//成员变量自动初始化
Connection connection;
Admin admin;
TableName tableName;
Table table;
/**
* @Date 2022.04.26
* @Description 获取连接以及表对象
*/
@Before
public void createConnection(){
//局部变量手动初始化
//获取配置对象
Configuration configuration = new Configuration();
configuration.set("hbase.zookeeper.quorum", "IP地址");
//获取连接
try {
connection = ConnectionFactory.createConnection(configuration);
//获取管理员对象
admin = connection.getAdmin();
//获取表名
tableName = tableName.valueOf("emp2");
//获取表的对象
table = connection.getTable(tableName);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* @Date 2022.04.26
* @Description rowkey前缀过滤器
* rowkey过滤器加上前缀比较器
* 过滤出rowkey(id)以100开头的
* rowkey过滤器加上前缀比较器后,与rowkey前缀过滤器的效果相同
*/
@Test
public void PrefixFilter(){
PrefixFilter prefixFilter = new PrefixFilter("100".getBytes());
print(prefixFilter);
}
//打印过滤后数据
private void print(Filter filter){
//创建Scan对象
Scan scan = new Scan();
//将过滤器放到scan对象
scan.setFilter(filter);
//调用scan
try {
ResultScanner scanner = table.getScanner(scan);
//解析
for (Result result : scanner) {
//scan扫描返回的时多行数据,遍历循环每一行的数据
// 利用getrow方法取出rowkey
// 利用getValue方法取出这一行的value值,根据列簇和列确定一个单元格的值
//拿到rowKey
String rowKey = Bytes.toString(result.getRow());//byte数组转字符串
//拿到其他列
String name = Bytes.toString(result.getValue("info".getBytes(), "name".getBytes()));
String job = Bytes.toString(result.getValue("info".getBytes(), "job".getBytes()));
String salary = Bytes.toString(result.getValue("info".getBytes(), "salary".getBytes()));
String deptName = Bytes.toString(result.getValue("info".getBytes(), "deptName".getBytes()));
System.out.println("id:"+rowKey+",name:"+name+",job:"+job+",salary:"+salary+",deptName:"+deptName);
}
} catch (IOException e) {
e.printStackTrace();
}
}
@After
public void close(){
try {
admin.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
connection.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
运行结果:
/**
* @Date 2022.04.26
* @Description 列簇过滤器
* 匹配列簇
*/
@Test
public void familyFilterTest(){
RegexStringComparator regexStringComparator = new RegexStringComparator("i[a-zA-Z]");
FamilyFilter familyFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, regexStringComparator);
print(familyFilter);
}
运行结果:
/**
* @Date 2022.04.26
* @Description 列过滤器
* 匹配子字符串
* 其他列为null
*/
@Test
public void substringFilterTest(){
SubstringComparator substringComparator = new SubstringComparator("me");
QualifierFilter qualifierFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, substringComparator);
print(qualifierFilter);
}
运行结果:
@Test
public void manyColumnFilter(){
//查询列
SubstringComparator substringComparator = new SubstringComparator("a");
QualifierFilter qualifierFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, substringComparator);
//查询salary>30000
BinaryComparator binaryComparator = new BinaryComparator("30000".getBytes());
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter("info".getBytes(), "salary".getBytes(), CompareFilter.CompareOp.GREATER, binaryComparator);
//利用FilterList,将多个过滤器放在一起,一起过滤
FilterList filterList = new FilterList();
filterList.addFilter(qualifierFilter);
filterList.addFilter(singleColumnValueFilter);
print(filterList);
}
运行结果: