HBase: HTablePool重构及优化

阅读更多

HBase Version: hbase-0.90.3-cdh3u1

org.apache.hadoop.hbase.client.HTablePool

用起来不是很方便. 所以重写了一些HTablePool, 对自己业务逻辑这块比较相关. 欢迎讨论.

主要是对源代码下面4点进行改进和设置:

1.  为不同的table建立的poolSize不一样, 目前HTablePool为所有的table建立的maxSize一致.

private final int maxSize;

2.  从HTablePool中getTable是只初始化了一个HTable, 而且在这个时候才初始化HTable的Queen.

    这个点不是很好, 也是我想修改源码的出发点.

  /**
   * Get a reference to the specified table from the pool.

* * Create a new one if one is not available. * @param tableName table name * @return a reference to the specified table * @throws RuntimeException if there is a problem instantiating the HTable */ public HTableInterface getTable(String tableName) { LinkedList queue = tables.get(tableName); if(queue == null) { queue = new LinkedList(); tables.putIfAbsent(tableName, queue); return createHTable(tableName); } HTableInterface table; synchronized(queue) { table = queue.poll(); } if(table == null) { return createHTable(tableName); } return table; }

 3.   应该有一个createTablePool的方法, 便于用户自己创建HTablePool.此方法可以与closeTablePool相互呼应.创建后面再关闭.

  public void closeTablePool(final String tableName)  {
    Queue queue = tables.get(tableName);
    synchronized (queue) {
      HTableInterface table = queue.poll();
      while (table != null) {
        this.tableFactory.releaseHTableInterface(table);
        table = queue.poll();
      }
    }
    HConnectionManager.deleteConnection(this.config, true);
  }

 4.  由于HTable的Put可以使用优化, 让多个Put一起提交flushCommits(). 循环pool的Htable,调用flushCommits().

 

贴上修改后的源码:

package org.apache.hadoop.hbase.client;

import java.util.LinkedList;
import java.util.Queue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.Bytes;
/**
 * A rewrite pool of HTable instances.

* * Each HTablePool acts as a pool for all tables. To use, instantiate an * HTablePool and use {@link #getTable(String)} to get an HTable from the pool. * Once you are done with it, return it to the pool with {@link #putTable(HTableInterface)}. * *

A pool can be created with a maxSize which defines the most HTable * references that will ever be retained for each table. Otherwise the default * is {@link Integer#MAX_VALUE}. * *

Pool will manage its own cluster to the cluster. See {@link HConnectionManager}. * * @author greatwqs * @update 2012-08-25 */ public class MyHTablePool { public final static int DEFAULT_POOL_SIZE = 4; /** * ConcurrentMap> * String tableName * LinkedList the HTable pool contains HTableInterface * LinkedList you can create HTable pool different size you want. */ private final ConcurrentMap> tables = new ConcurrentHashMap>(); /*** * Configuration for hbase-site.xml */ private final Configuration config; /** * HTableInterfaceFactory that createHTableInterface and releaseHTableInterface */ private final HTableInterfaceFactory tableFactory; /** * Default Constructor. */ public MyHTablePool() { this(HBaseConfiguration.create()); } /** * Constructor to set maximum versions and use the specified configuration. * @param config configuration */ public MyHTablePool(final Configuration config) { this(config, null); } public MyHTablePool(final Configuration config, final HTableInterfaceFactory tableFactory) { // Make a new configuration instance so I can safely cleanup when // done with the pool. this.config = config == null ? new Configuration() : new Configuration( config); this.tableFactory = tableFactory == null ? new HTableFactory() : tableFactory; } /** * Create all the HTable instances , belonging to the given table. *

* Note: this is a 'create' of the given table pool. * @param tableName * @param maxSize * @param isAutoFlush */ public void createHTablePool(final String tableName, final int maxSize, boolean isAutoFlush) { LinkedList queue = tables.get(tableName); if (queue == null) { queue = new LinkedList(); tables.putIfAbsent(tableName, queue); } synchronized (queue) { int addHTableSize = maxSize - queue.size(); if(addHTableSize <= 0){ return; } for(int i=0; i * Note: this is a 'create' of the given table pool. * @param tableNameArray * @param maxSize * @param isAutoFlush default false * usage example: * false: when {@link Put} use. use buffere put. call flushCommits after a time. * you can design a thread(such as 3MS run a time)to loop all pool table, and call flushCommits. * the performance well. * true: when {@link Scan} and {@link Delete} use. */ public void createHTablePool(final String[] tableNameArray, final int maxSize, boolean isAutoFlush) { for(String tableName : tableNameArray){ createHTablePool(tableName,maxSize,isAutoFlush); } } /** * Create all the HTable instances , belonging to the given tables. *

* Note: this is a 'create' of the given table pool. * @param tableName * @param maxSize */ public void createHTablePool(final String[] tableNameArray, final int maxSize) { createHTablePool(tableNameArray,maxSize,false); } /** * Get a reference to the specified table from the pool.

* * @param tableName table name * @return a reference to the specified table * @throws RuntimeException if there is a problem instantiating the HTable */ public HTableInterface getHTable(String tableName) { LinkedList queue = tables.get(tableName); if (queue == null) { throw new RuntimeException("There is no pool for the HTable"); } HTableInterface table; synchronized (queue) { table = queue.poll(); } return table; } /** * Get a reference to the specified table from the pool.

* * Create a new one if one is not available. * @param tableName table name * @return a reference to the specified table * @throws RuntimeException if there is a problem instantiating the HTable */ public HTableInterface getHTable(byte[] tableName) { return getHTable(Bytes.toString(tableName)); } /** * Puts the specified HTable back into the pool. *

* * If the HTable not belong to HTablePool before, do not use this method. * * @param table table */ public void putHTableBack(HTableInterface table) { LinkedList queue = tables.get(Bytes.toString(table.getTableName())); synchronized (queue) { queue.add(table); } } protected HTableInterface createHTable(String tableName) { return this.tableFactory.createHTableInterface(config, Bytes .toBytes(tableName)); } /** * Closes all the HTable instances , belonging to the given table, in the table pool. *

* Note: this is a 'shutdown' of the given table pool and different from * {@link #putTable(HTableInterface)}, that is used to return the table * instance to the pool for future re-use. * * @param tableName */ public void closeHTablePool(final String tableName) { Queue queue = tables.get(tableName); synchronized (queue) { HTableInterface table = queue.poll(); while (table != null) { this.tableFactory.releaseHTableInterface(table); table = queue.poll(); } } HConnectionManager.deleteConnection(this.config, true); } /** * See {@link #closeTablePool(String)}. * * @param tableName */ public void closeHTablePool(final byte[] tableName) { closeHTablePool(Bytes.toString(tableName)); } /** * See {@link #closeTablePool(String)}. * * @param tableName */ public void closeHTablePool() { for(String tabName:tables.keySet()){ closeHTablePool(tabName); } } /** * getCurrentPoolSize * @param tableName * @return */ public int getCurrentPoolSize(String tableName) { Queue queue = tables.get(tableName); synchronized (queue) { return queue.size(); } } } 

 

org.apache.hadoop.hbase.client.MyHTablePoolTest 测试实例

package org.apache.hadoop.hbase.client;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.KeyValue;

/**
 * @author greatwqs
 * @update 2012-08-25
 */
public class MyHTablePoolTest {
	
	/**
	 * test method
	 * @param args
	 * @throws Exception
	 */
	public static void main(String[] args) throws Exception{
		// 1. my config file 
		String configFile = "conf/hbase-site.xml";
		Configuration config = new Configuration();
		config.addResource(new Path(configFile));
		// 2. init HTablePool
		MyHTablePool myPool = new MyHTablePool(config);
		// 3. create HTablePool for a table
		myPool.createHTablePool("DCP_DataCenter_Base", MyHTablePool.DEFAULT_POOL_SIZE, false);
		// 4. get already exist HTable from HTablePool
		HTable table = (HTable) myPool.getHTable("DCP_DataCenter_Base");
		if(table != null){
			System.out.println("get HTable from HTablePool Success!");
		}
		// 5. get all data from HTable, and print to console.
		Scan scan = new Scan();
		ResultScanner rs = table.getScanner(scan);
		try {
			for (Result result : rs) {
				KeyValue[] kv = result.raw();
				byte[] key = kv[0].getRow();
				System.out.println("RowKey: " + new String(key));
				for (int i = 0; i < kv.length; i++) {
					System.out.println("ColumnFamily: "	+ new String(kv[i].getFamily()));
					System.out.println("Qualifier: "+ new String(kv[i].getQualifier()));
					System.out.println("Timestamp: "+ String.valueOf(kv[i].getTimestamp()));
					System.out.println("Value: " + new String(kv[i].getValue()));
				}
				System.out.println();
			}
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			rs.close();
		}
		// 6. after use HTable end, then put the HTable back to HTablePool.
		myPool.putHTableBack(table);
		// 7. close HTablePool
		myPool.closeHTablePool();
	}
}

 

你可能感兴趣的:(HTablePool,rewrite,flushCommits,Put,Scan)