1
2
|
tar
-xzvf jdk-7u67-linux-x64.
tar
.gz
mv
–r jdk1.7.0_67 $HOME
/tools
|
1
2
|
export
JAVA_HOME=$HOME
/tools/jdk1
.7.0_67/
export
PATH=$JAVA_HOME
/bin
:$PATH
|
1
2
|
wget href="https:
//protobuf
.googlecode.com
/files/protobuf-2
.5.0.
tar
.bz2
tar
xjvf protobuf-2.5.0.
tar
.bz2
|
1
2
3
|
mkdir
$HOME
/tools/protobuf-2
.5.0
.
/configure
--prefix=$HOME
/tools/protobuf-2
.5.0
make
;
make
install
|
1
2
|
export
PROTO_HOME=$HOME
/tools/protobuf-2
.5.0
export
PATH=$PROTO_HOME:$PATH
|
1
2
3
4
5
|
tar
-xzvf apache-maven-3.3.1-bin.
tar
.gz
mv
apache-maven-3.3.1 $HOME
/tools
vi
.bashrc
export
MAVEN_HOME=$HOME
/tools/apache-maven-3
.3.1
export
PATH=$MAVEN_HOME
/bin
:$PATH
|
1
2
3
4
5
|
tar
xzvf hbase-0.98.11-hadoop2-bin.
tar
.gz
mv
hbase-0.98.11-hadoop2
vi
~/.bashrc
export
HBASE_HOME=$HOME
/tools/hbase-0
.98.11-hadoop2
export
PATH=$HBASE_HOME
/bin
:$PATH
|
1
|
start-hbase.sh
|
1
|
$
mkdir
$PROJECT_HOME
|
1
2
3
|
$
cd
$PROJECT_HOME
$ mvn archetype:generate -DgroupId=org.ibm.developerworks -DartifactId=regionCount
-DarchetypeArtifactId=maven-archetype-quickstart -DinteractiveMode=
false
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
|
option java_package =
"org.ibm.developerworks"
;
option java_outer_classname =
"getRowCount"
;
option java_generic_services =
true
;
option optimize_for = SPEED;
message getRowCountRequest{
required bool reCount = 1;
}
message getRowCountResponse {
optional int64 rowCount = 1;
}
service ibmDeveloperWorksService {
rpc getRowCount(getRowCountRequest)
returns(getRowCountResponse);
}
|
1
2
|
$
mkdir
$PROJECT_HOME
/rowCount/src/main/protobuf
$
mv
ibmDeveloperworksDemo.proto $PROJECT_HOME
/rowCount/src/main/protobuf
|
1
2
|
$
cd
$PROJECT_HOME
/rowCount/src/main/protobuf
$ protoc --java_out=$PROJECT_HOME
/rowCount/src/main/java
ibmDeveloperworksDemo.proto
|
1
2
3
4
5
|
<dependency>
<groupId>com.google.protobuf<
/groupId
>
<artifactId>protobuf-java<
/artifactId
>
<version>2.5.0<
/version
>
<
/dependency
>
|
1
|
mvn clean compile
|
1
|
$
mkdir
$PROJECT_HOME
/rowCount/src/main/java/org/ibm/developerworks/coprocessor
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
|
//
这两个类成员是后续代码用来操作 ZooKeeper 的,在 start() 中进行初始化
private String zNodePath =
"/hbase/ibmdeveloperworks/demo"
;
private ZooKeeperWatcher zkw = null;
@Override
public void start(CoprocessorEnvironment
env
) throws IOException {
if
(
env
instanceof RegionCoprocessorEnvironment) {
this.re = (RegionCoprocessorEnvironment)
env
;
RegionServerServices rss = re.getRegionServerServices();
//
获取 ZooKeeper 对象,这个 ZooKeeper 就是本 HBase 实例所连接的 ZooKeeper
zkw = rss.getZooKeeper();
//
用 region name 作为 znode 的节点名后缀
zNodePath=zNodePath+re.getRegion().getRegionNameAsString();
}
else
{
throw new CoprocessorException(
"Must be loaded on a table region!"
);
}
}
|
1
2
3
4
|
@Override
public void stop(CoprocessorEnvironment
env
) throws IOException {
//
nothing to
do
}
|
1
2
3
4
5
6
7
|
/**
* Just returns a reference to this object,
which
implements the RowCounterService interface.
*/
@Override
public Service getService() {
return
this;
}
|
1
2
|
public void getRowCount(RpcController controller, getRowCount.getRowCountRequest request,
RpcCallback<getRowCount.getRowCountResponse>
done
)
|
1
|
boolean reCount=request.getReCount();
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
long getTableRowCountBatch(String tableName) {
try{
//
连接 Hbase
Configuration config = new Configuration();
HConnection connection = HConnectionManager.createConnection(config);
HTableInterface table = connection.getTable(tableName);
//
设置 request 参数
org.ibm.developerworks.getRowCount.getRowCountRequest.Builder builder =
getRowCountRequest.newBuilder();
builder.setReCount(
false
);
//
开始和结束 rowkey
byte[] s= Bytes.toBytes(
"r1"
);
byte[] e= Bytes.toBytes(
"t1"
);
//
调用 batchCoprocessorService
results = table.batchCoprocessorService(
ibmDeveloperWorksService.getDescriptor().findMethodByName(
"getRowCount"
),
builder.build(),s, e,
getRowCountResponse.getDefaultInstance());
}
Collection<getRowCountResponse> resultsc = results.values();
for
( getRowCountResponse r : resultsc)
{
totalRowCount += r.getRowCount();
}
return
totalRowCount;
}
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
|
boolean createTable(string tableName) {
//HBase
1.0 创建 Table
Configuration config = new Configuration();
Table table = null;
TableName TABLE = TableName.valueOf(tableName);
Admin admin= new Admin(config);
HTableDescriptor tableDesc = new HTableDescriptor(TABLE);
//HBase
0.98 创建 Table
Configuration config = new Configuration();
HBaseAdmin admin = new HBaseAdmin(config);
HTableDescriptor tableDesc = new HTableDescriptor(tableName);
//
添加 coprocessor
tableDesc.addCoprocessor(“org.ibm.developerworks.coprocessor.getRowCountEndpoint”);
tableDesc.addCoprocessor(“org.ibm.developerworks.coprocessor.rowCountObserver”);
//
省去其他的 HTableDescriptor 操作代码
...
//
创建表
admin.createTable(tableDesc);
}
|
1
|
java hbaseCoprocessorDemo
test
1 666 rowkey
|
1
|
private static final Log LOG = LogFactory.getLog(RowCountObserver.class);
|
HBase 的协处理器用途广泛,但是 HBase 的文档中对协处理器编程的细节却缺乏实用性的编程方法描述,希望本文能够为广大 HBase 用户提供一个较为详细的入门介绍。由于作者水平有限,文中可能有错误和不妥的地方,还希望读者不吝赐教。
《HBase 协处理器编程详解》系列第一部分介绍了 HBase 协处理器 Server 端代码的开发细节。分别实现了 Endpoint 和 Observer 协处理器,它们两个互相配合,为客户端提供 RPC 服务,获取指定 Region 上的总的 rowcount,即记录的个数。现在,本篇作为该系列第二部分,着重演示客户端应用程序中如何调用 Endpoint 协处理器,以便获得该服务。
实现 Client 端代码
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
long singleRegionCount(String tableName, String rowkey,boolean reCount)
{
long rowcount = 0;
try{
Configuration config = new Configuration();
HConnection conn = HConnectionManager.createConnection(config);
HTableInterface tbl = conn.getTable(tableName);
//
获取 Channel
CoprocessorRpcChannel channel = tbl.coprocessorService(rowkey.getBytes());
org.ibm.developerworks.getRowCount.ibmDeveloperWorksService.BlockingInterface service =
org.ibm.developerworks.getRowCount.ibmDeveloperWorksService.newBlockingStub(channel);
//
设置 RPC 入口参数
org.ibm.developerworks.getRowCount.getRowCountRequest.Builder request =
org.ibm.developerworks.getRowCount.getRowCountRequest.newBuilder();
request.setReCount(reCount);
//
调用 RPC
org.ibm.developerworks.getRowCount.getRowCountResponse ret =
service.getRowCount(null, request.build());
//
解析结果
rowcount = ret.getRowCount();
}
catch(Exception e) {e.printStackTrace();}
return
rowcount;
}
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
|
Batch.Call<ibmDeveloperWorksService, getRowCountResponse> callable =
new Batch.Call<ibmDeveloperWorksService, getRowCountResponse>() {
ServerRpcController controller = new ServerRpcController();
BlockingRpcCallback<getRowCountResponse> rpcCallback =
new BlockingRpcCallback<getRowCountResponse>();
//
下面重载 call 方法
@Override
public getRowCountResponse call(ibmDeveloperWorksService instance) throws IOException {
//
初始化 RPC 的入口参数,设置 reCount 为
true
//Server
端会进行慢速的遍历 region 的方法进行统计
org.ibm.developerworks.getRowCount.getRowCountRequest.Builder builder =
getRowCountRequest.newBuilder();
builder.setreCount(
true
);
//RPC
调用
instance.getRowCount(controller, builder.build(), rpcCallback);
//
直接返回结果,即该 Region 的 rowCount
return
rpcCallback.get();
}
};
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
|
long getTableRowCountSlow(string tableName) {
//
创建 Table 实例, HBase 1.0
Connection connection = ConnectionFactory.createConnection(conf);
Table table = connection.getTable(tableName);
//
创建 HTable 实例,HBase 0.98
HConnection connection = HConnectionManager.createConnection(config);
HTable table = connection.getTable(tableName);
Batch.Call<ibmDeveloperWorksService, getRowCountResponse> callable =
... 省略代码,参考代码清单 2
results = table.coprocessorService(ibmDeveloperWorksService.class, null, null,
callable);
long totalRowCount = 0;
for
( r : results)
{
totalRowCount += r.value();
}
return
totalRowCount;
}
|
01
02
03
04
05
06
07
08
09
10
11
|
//
定义总的 rowCount 变量
final AtomicLong totalRowCount = new AtomicLong();
//
定义 callback
Batch.Callback< Long > callback =
new Batch.Callback<Long>() {
@Override
public void update(byte[] region, byte[] row, getRowCountResponse result) {
//
直接将 Batch.Call 的结果,即单个 region 的 rowCount 累加到 totalRowCount
totalRowCount.getAndAdd(result.getRowCount());
}
};
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
|
long getTableRowCountFast(string tableName) {
//
创建 Table 实例, HBase 1.0
Connection connection = ConnectionFactory.createConnection(conf);
TableName TABLE = TableName.valueOf(tableName);
Table table = connection.getTable(TABLE);
//
创建 HTable 实例,HBase 0.98
HConnection connection = HConnectionManager.createConnection(config);
HTable table = connection.getTable(tableName);
Batch.Call<ibmDeveloperWorksService, getRowCountResponse> callable =
new Batch.Call<ibmDeveloperWorksService, getRowCountResponse>() {
ServerRpcController controller = new ServerRpcController();
BlockingRpcCallback<getRowCountResponse> rpcCallback =
new BlockingRpcCallback<getRowCountResponse>();
//
下面重载 call 方法
@Override
public getRowCountResponse call(ibmDeveloperWorksService instance)
throws IOException {
//
初始化 RPC 的入口参数,设置 reCount 为
false
//Server
端会进行慢速的遍历 region 的方法进行统计
org.ibm.developerworks.getRowCount.getRowCountRequest.Builder builder =
getRowCountRequest.newBuilder();
builder.setreCount(
false
);
//RPC
调用
instance.getRowCount(controller, builder.build(), rpcCallback);
//
直接返回结果,即该 Region 的 rowCount
return
rpcCallback.get();
}
};
//
定义总的 rowCount 变量
AtomicLong totalRowCount = new AtomicLong();
//
定义 callback
Batch.Callback< Long > callback =
new Batch.Callback<Long>() {
@Override
public void update(byte[] region, byte[] row, Long result) {
//
直接将 Batch.Call 的结果,即单个 region 的 rowCount 累加到 totalRowCount
totalRowCount.getAndAdd(result);
}
};
table.coprocessorService( ibmDeveloperWorksService.class, null, null,
callable, callback);
return
totalRowCount;
}
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
long getTableRowCountBatch(String tableName) {
try{
//
连接 Hbase
Configuration config = new Configuration();
HConnection connection = HConnectionManager.createConnection(config);
HTableInterface table = connection.getTable(tableName);
//
设置 request 参数
org.ibm.developerworks.getRowCount.getRowCountRequest.Builder builder =
getRowCountRequest.newBuilder();
builder.setReCount(
false
);
//
开始和结束 rowkey
byte[] s= Bytes.toBytes(
"r1"
);
byte[] e= Bytes.toBytes(
"t1"
);
//
调用 batchCoprocessorService
results = table.batchCoprocessorService(
ibmDeveloperWorksService.getDescriptor().findMethodByName(
"getRowCount"
),
builder.build(),s, e,
getRowCountResponse.getDefaultInstance());
}
Collection<getRowCountResponse> resultsc = results.values();
for
( getRowCountResponse r : resultsc)
{
totalRowCount += r.getRowCount();
}
return
totalRowCount;
}
|
01
|