HBase的table是该region切分的,client操作一个row的时候,如何知道这个row对应的region是在哪台Region server上呢?这里有个region location过程。主要涉及到2张系统表,-ROOT-,.META.。其结构见图
在zookeeper的/hbase/root-region-server节点中存着-ROOT-表所在的Region Server地址。
-ROOT-表的一个row代表着META的一个region信息,其key的结构是META表名,META表Region的startkey,RegionId。其value的主要保存regioninfo和server信息。ROOT表不能split
.META.表的一个row代表着用户表的一个region信息,其key的结构是其实就是用户表的regionName,用户表名,startKey,RegionId。其value同样保存着regioninfo和server信息。META表可以split,但是一个region默认有128M,可以存上亿个用户表的region信息,所以一般不会split。
其查找过程如下:
1.通过zk getData拿-ROOT-表的location
2.RPC -ROOT-表的rs,getClosestRowBefore,拿row对应的meta表的region location
3.RPC .META.表的某一个region,拿该row在真实table所在的region location
4.RPC对应region
region location需要3次网络IO,为了提升性能,client会cache数据。
LocationCache是一个2级Map,第一级的key是tableName的hash值,第二级的key是starRow,用SoftValueSortedMap包装了TreeMap实现,用软引用实现cache,内存不够时才会回收。Cache里存在META表和用户表的region location信息。
其代码实现如下,0.94版本:
HConnectionManager locateRegion入口
- private HRegionLocation locateRegion(final byte [] tableName,
- final byte [] row, boolean useCache)
- throws IOException {
- .......
-
- ensureZookeeperTrackers();
-
- if (Bytes.equals(tableName, HConstants.ROOT_TABLE_NAME)) {
- try {
-
- ServerName servername = this.rootRegionTracker.waitRootRegionLocation(this.rpcTimeout);
- LOG.debug("Looked up root region location, connection=" + this +
- "; serverName=" + ((servername == null)? "": servername.toString()));
- if (servername == null) return null;
-
- return new HRegionLocation(HRegionInfo.ROOT_REGIONINFO,
- servername.getHostname(), servername.getPort());
- } catch (InterruptedException e) {
- Thread.currentThread().interrupt();
- return null;
- }
- }
-
-
- else if (Bytes.equals(tableName, HConstants.META_TABLE_NAME)) {
- return locateRegionInMeta(HConstants.ROOT_TABLE_NAME, tableName, row,
- useCache, metaRegionLock);
- }
-
-
- else {
-
- return locateRegionInMeta(HConstants.META_TABLE_NAME, tableName, row,
- useCache, userRegionLock);
- }
- }
locateRegionInMeta方法
- private HRegionLocation locateRegionInMeta(final byte [] parentTable,
- final byte [] tableName, final byte [] row, boolean useCache,
- Object regionLockObject)
- throws IOException {
- HRegionLocation location;
-
-
-
-
- if (useCache) {
- location = getCachedLocation(tableName, row);
- if (location != null) {
- return location;
- }
- }
-
-
-
-
-
- byte [] metaKey = HRegionInfo.createRegionName(tableName, row,
- HConstants.NINES, false);
-
- for (int tries = 0; true; tries++) {
-
- if (tries >= numRetries) {
- throw new NoServerForRegionException("Unable to find region for "
- + Bytes.toStringBinary(row) + " after " + numRetries + " tries.");
- }
-
- HRegionLocation metaLocation = null;
- try {
-
-
- metaLocation = locateRegion(parentTable, metaKey);
-
- if (metaLocation == null) continue;
-
-
- HRegionInterface server =
- getHRegionConnection(metaLocation.getHostname(), metaLocation.getPort());
-
- Result regionInfoRow = null;
-
-
-
- synchronized (regionLockObject) {
-
-
-
- if (Bytes.equals(parentTable, HConstants.META_TABLE_NAME) &&
- (getRegionCachePrefetch(tableName)) ) {
- prefetchRegionCache(tableName, row);
- }
-
-
-
-
-
- if (useCache) {
- location = getCachedLocation(tableName, row);
- if (location != null) {
- return location;
- }
- }
-
-
- else {
- deleteCachedLocation(tableName, row);
- }
-
-
-
- regionInfoRow = server.getClosestRowBefore(
- metaLocation.getRegionInfo().getRegionName(), metaKey,
- HConstants.CATALOG_FAMILY);
- }
- if (regionInfoRow == null) {
- throw new TableNotFoundException(Bytes.toString(tableName));
- }
-
- byte [] value = regionInfoRow.getValue(HConstants.CATALOG_FAMILY,
- HConstants.REGIONINFO_QUALIFIER);
- if (value == null || value.length == 0) {
- throw new IOException("HRegionInfo was null or empty in " +
- Bytes.toString(parentTable) + ", row=" + regionInfoRow);
- }
-
-
- HRegionInfo regionInfo = (HRegionInfo) Writables.getWritable(
- value, new HRegionInfo());
-
-
- if (!Bytes.equals(regionInfo.getTableName(), tableName)) {
- throw new TableNotFoundException(
- "Table '" + Bytes.toString(tableName) + "' was not found, got: " +
- Bytes.toString(regionInfo.getTableName()) + ".");
- }
- if (regionInfo.isSplit()) {
- throw new RegionOfflineException("the only available region for" +
- " the required row is a split parent," +
- " the daughters should be online soon: " +
- regionInfo.getRegionNameAsString());
- }
- if (regionInfo.isOffline()) {
- throw new RegionOfflineException("the region is offline, could" +
- " be caused by a disable table call: " +
- regionInfo.getRegionNameAsString());
- }
-
- value = regionInfoRow.getValue(HConstants.CATALOG_FAMILY,
- HConstants.SERVER_QUALIFIER);
- String hostAndPort = "";
- if (value != null) {
- hostAndPort = Bytes.toString(value);
- }
- ......
-
-
- String hostname = Addressing.parseHostname(hostAndPort);
- int port = Addressing.parsePort(hostAndPort);
- location = new HRegionLocation(regionInfo, hostname, port);
-
- cacheLocation(tableName, location);
- return location;
- } catch (TableNotFoundException e) {
-
-
-
- throw e;
- } catch (IOException e) {
- if (e instanceof RemoteException) {
- e = RemoteExceptionHandler.decodeRemoteException((RemoteException) e);
- }
- if (tries < numRetries - 1) {
- .......
- } else {
- throw e;
- }
-
-
- if(!(e instanceof RegionOfflineException ||
- e instanceof NoServerForRegionException)) {
- relocateRegion(parentTable, metaKey);
- }
- }
-
- try{
- Thread.sleep(ConnectionUtils.getPauseTime(this.pause, tries));
- } catch (InterruptedException e) {
- Thread.currentThread().interrupt();
- throw new IOException("Giving up trying to location region in " +
- "meta: thread is interrupted.");
- }
- }
- }