HBase的coprocessor分为两类,Observer和EndPoint。HBase中的协处理器可以被理解为Mysql中的触发器或者是存储过程。其中,Observer相当于触发器,代码部署在服务端,相当于对API调用的代理。
这里使用RegionObserver
作为示例,简单介绍RegionServer
的JavaAPI
的编写。
- MyRegionObserver
类
package TwitBase.hbase;
import com.sun.org.apache.commons.logging.Log;
import com.sun.org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.CoprocessorEnvironment;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
public class MyRegionObserver extends BaseRegionObserver {
private static final Log LOG = LogFactory.getLog(MyRegionObserver.class);
private RegionCoprocessorEnvironment env = null;
// 设定只有F族下的列才能被操作,且A列只写,B列只读。的语言
private static final String FAMILY_NAME = "F";
private static final String ONLY_PUT_COL = "A";
private static final String ONLY_READ_COL = "B";
// 协处理器是运行于region中的,每一个region都会加载协处理器
// 这个方法会在regionserver打开region时候执行(还没有真正打开)
@Override
public void start(CoprocessorEnvironment e) throws IOException {
env = (RegionCoprocessorEnvironment) e;
}
// 这个方法会在regionserver关闭region时候执行(还没有真正关闭)
@Override
public void stop(CoprocessorEnvironment e) throws IOException {
//nothing to do here
}
/**
* 需求
* 1.不允许直接插入B列
* 2.只能插入A列
* 3.插入的数据必须为整数
* 4.插入A列的时候自动插入B列
*/
@Override
public void prePut(final ObserverContext e,
final Put put, final WALEdit edit, final Durability durability)
throws IOException {
//首先查看单个put中是否有 对只读列的写操作 --->即检查B列
//这里的put是一个操作(作为一个函数的参数存在),对表的一些行操作
List cells = put.get(Bytes.toBytes(FAMILY_NAME),
Bytes.toBytes(ONLY_READ_COL));
if (cells != null && cells.size() != 0) {
LOG.warn("User is not allowed to write read_only col.");
throw new IOException("User is not allowed to write read_only col.");
}
// 检查A列
//这里的get()方法是通过能够匹配的column family and qualifier去寻找一个put操作
cells = put.get(Bytes.toBytes(FAMILY_NAME),
Bytes.toBytes(ONLY_PUT_COL));
if (cells == null || cells.size() == 0) {
//当不存在对于A列的操作的时候则不做任何的处理,直接放行即可
LOG.info("Column A don't have operation, just do it.");
return;
}
//当A列存在的情况下,再进行值的检查,查看是否插入了整数
byte[] aValue = null;
for (Cell cell : cells) {
try {
aValue = CellUtil.cloneValue(cell);
LOG.warn("aValue = " + Bytes.toString(aValue));
Integer.valueOf(Bytes.toString(aValue));
} catch (Exception e1) {
LOG.warn("Can not put un number value to A col.");
throw new IOException("Can not put un number value to A col.");
}
}
//当一切都通过时,再去构建B列的值,因为按照需求,插入A列的时候需要同时插入B列
LOG.info("B col also been put value!");
put.addColumn(Bytes.toBytes(FAMILY_NAME),
Bytes.toBytes(ONLY_READ_COL), aValue);
}
/**
* 需求
* 1.不能直接删除B列
* 2.只能删除A列
* 3.删除A列的时候需要同步删除B列
* 4.preDelete这个操作肯定是针对delete操作,所以delete是其一个参数
*/
@Override
public void preDelete(
final ObserverContext e,
final Delete delete, final WALEdit edit, final Durability durability)
throws IOException{
//首先查看是否对于B列进行了指定删除
List cells = delete.getFamilyCellMap().get(Bytes.toBytes(FAMILY_NAME));
if (cells == null || cells.size() == 0) {
//如果客户端没有针对于FAMILY_NAME列族的操作则不用关心,让其继续操作即可。
LOG.info("NO F family operation ,just do it.");
return;
}
// 开始检查F列族内的操作情况
byte[] qualifierName = null;
boolean aDeleteFlg = false;
for (Cell cell : cells) {
qualifierName = CellUtil.cloneQualifier(cell);
// 检查是否对B列进行了删除,这个是不允许的
if (Arrays.equals(qualifierName, Bytes.toBytes(ONLY_READ_COL))) {
LOG.info("Can not delete read only B col.");
throw new IOException("Can not delete read only B col.");
}
// 检查是否存在对于A队列的删除
if (Arrays.equals(qualifierName, Bytes.toBytes(ONLY_PUT_COL))) {
LOG.info("there is A col in delete operation!");
aDeleteFlg = true;
}
}
// 如果对于A列有删除,则需要对B列也要删除
if (aDeleteFlg)
{
LOG.info("B col also been deleted!");
delete.addColumn(Bytes.toBytes(FAMILY_NAME), Bytes.toBytes(ONLY_READ_COL));
}
}
} | |
这里对文件pom.xml
中的内容不做介绍,只要适合自己集群中的大数据组件的依赖即可。将该程序打包,提交到集群hdfs
上。
Hbase shell
创建表创建表coprocessor_table
,列族为F
。
hbase(main):022:0> create 'coprocessor_table','F'
0 row(s) in 1.2450 seconds
=> Hbase::Table - coprocessor_table
为表添加协处理器
hbase(main):023:0> alter 'coprocessor_table',METHOD =>'table_att','coprocessor'=>'hdfs://192.168.211.3:9000/MyHBaseInAction-1.0-SNAPSHOT.jar|HBaseIA.TwitBase.hbase.MyRegionObserver|1001'
Updating all regions with the new schema...
1/1 regions updated.
Done.
0 row(s) in 1.9270 seconds
向表coprocessoer
中put信息
hbase(main):024:0> put 'coprocessor_table','row1','F:A',123
0 row(s) in 0.0090 seconds
扫描全表
hbase(main):025:0> scan 'coprocessor_table'
ROW COLUMN+CELL
row1 column=F:A, timestamp=1529434234158, value=123
row1 column=F:B, timestamp=1529434234158, value=123
1 row(s) in 0.0130 seconds
可以看到,这里列族F中列B也被添加了数据(这就是协处理的作用)
检验是否可以向指定表coprocessor_table
下的指定列族F
的指定列A
,插入非数字内容
hbase(main):028:0> put 'coprocessor_table','row1','F:A','dfs'
ERROR: Failed 1 action: IOException: 1 time, servers with issues: littlelawson,16020,1529427401818
Here is some help for this command:
Put a cell 'value' at specified table/row/column and optionally
timestamp coordinates. To put a cell value into table 'ns1:t1' or 't1'
at row 'r1' under column 'c1' marked with the time 'ts1', do:
hbase> put 'ns1:t1', 'r1', 'c1', 'value'
hbase> put 't1', 'r1', 'c1', 'value'
hbase> put 't1', 'r1', 'c1', 'value', ts1
hbase> put 't1', 'r1', 'c1', 'value', {ATTRIBUTES=>{'mykey'=>'myvalue'}}
hbase> put 't1', 'r1', 'c1', 'value', ts1, {ATTRIBUTES=>{'mykey'=>'myvalue'}}
hbase> put 't1', 'r1', 'c1', 'value', ts1, {VISIBILITY=>'PRIVATE|SECRET'}
The same commands also can be run on a table reference. Suppose you had a reference
t to table 't1', the corresponding command would be:
hbase> t.put 'r1', 'c1', 'value', ts1, {ATTRIBUTES=>{'mykey'=>'myvalue'}}
检查是否可以向表中的列族中的B列插入数据:
hbase(main):029:0> put 'coprocessor_table','row1','F:B',456
ERROR: Failed 1 action: IOException: 1 time, servers with issues: littlelawson,16020,1529427401818
Here is some help for this command:
Put a cell 'value' at specified table/row/column and optionally
timestamp coordinates. To put a cell value into table 'ns1:t1' or 't1'
at row 'r1' under column 'c1' marked with the time 'ts1', do:
hbase> put 'ns1:t1', 'r1', 'c1', 'value'
hbase> put 't1', 'r1', 'c1', 'value'
hbase> put 't1', 'r1', 'c1', 'value', ts1
hbase> put 't1', 'r1', 'c1', 'value', {ATTRIBUTES=>{'mykey'=>'myvalue'}}
hbase> put 't1', 'r1', 'c1', 'value', ts1, {ATTRIBUTES=>{'mykey'=>'myvalue'}}
hbase> put 't1', 'r1', 'c1', 'value', ts1, {VISIBILITY=>'PRIVATE|SECRET'}
The same commands also can be run on a table reference. Suppose you had a reference
t to table 't1', the corresponding command would be:
hbase> t.put 'r1', 'c1', 'value', ts1, {ATTRIBUTES=>{'mykey'=>'myvalue'}}
需要注意的地方有下:
- 执行命令:alter 'coprocessor_table',METHOD =>'table_att','coprocessor'=>'hdfs://192.168.211.3:9000/MyHBaseInAction-1.0-SNAPSHOT.jar|HBaseIA.TwitBase.hbase.MyRegionObserver|1001'
这里需要清楚的是,将Observer
所在的jar
包放到了hdfs上,所以需要指定hdfs路径,否则报错如下:
ERROR: org.apache.hadoop.hbase.DoNotRetryIOException: java.net.UnknownHostException:
MyHBaseInAction-1.0-SNAPSHOT.jar Set hbase.table.sanity.checks to false at conf or
table descriptor if you want to bypass sanity checks
....
METHOD
不能小写,否则报错。【HBase是大小写敏感的】