java hbase api 批量高效写入数据(线程池方式)

笔者尝试各种方法写入HBase,其中使用线程池方式写入数据最为快速。

测试环境:hbase 1.2.4, hadoop 2.7 , 单条数据大小1kb,7台服务器24核48g内存千兆网卡,测试1000w数据写入 

测试结果能达到百万级每秒写入速度。

下面上代码。

pom.xml



    4.0.0

    hcg
    hcg
    1.0-SNAPSHOT

    
        
            Akka repository
            http://repo.akka.io/releases
        
        
            jboss
            http://repository.jboss.org/nexus/content/groups/public-jboss
        
        
            Sonatype snapshots
            http://oss.sonatype.org/content/repositories/snapshots/
        
        
            cloudera
            https://repository.cloudera.com/artifactory/cloudera-repos/
        
        
            central
            http://repo.maven.apache.org/maven2
        
    

    
        src/
        src/test/

        
            
                org.apache.maven.plugins
                maven-shade-plugin
                2.2
                
                    
                        package
                        
                            shade
                        
                        
                            
                                
                                    *:*
                                    
                                        META-INF/*.SF
                                        META-INF/*.DSA
                                        META-INF/*.RSA
                                    
                                
                            
                            

                                
                                    reference.conf
                                

                                
                                

                            
                        
                    
                
            
            
                org.apache.maven.plugins
                maven-compiler-plugin
                
                    1.7
                    1.7
                
            
        
    

    
        1.2.4
        I:/java/jdk1.7
    

    
        
        
            org.apache.hbase
            hbase-client
            ${hbase.version}
        

        
            org.apache.hbase
            hbase-common
            ${hbase.version}
        

        
            org.apache.hbase
            hbase-server
            ${hbase.version}
        

    

HBaseUtil.java

package cn.ngsoc.hbase.util;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.PageFilter;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
import org.apache.hadoop.hbase.util.Bytes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;

/**
 * HBase 工具类
 * Created by babylon on 2016/11/29.
 */
public class HBaseUtil {
	
	private static final Logger logger = LoggerFactory.getLogger(HBaseUtil.class);

	private static Configuration conf;
	private static Connection conn;

//	static {
//		try {
//			if (conf == null) {
//			    conf = HBaseConfiguration.create();
////                conf.set("hbase.zookeeper.property.clientPort", ConfigUtil.getInstance().getConfigVal("zkport", ConstantProperties.COMMON_PROP));
//                conf.set("hbase.zookeeper.quorum", ConfigUtil.getInstance().getConfigVal("zkhost", ConstantProperties.COMMON_PROP));
//                conf.set("zookeeper.znode.parent", "/hbase");
//			}
//		} catch (Exception e) {
//			logger.error("HBase Configuration Initialization failure !");
//			throw new RuntimeException(e) ;
//		}
//	}

	public static void init(String zkHost){
		try {
			if (conf == null) {
				conf = HBaseConfiguration.create();
//                conf.set("hbase.zookeeper.property.clientPort", ConfigUtil.getInstance().getConfigVal("zkport", ConstantProperties.COMMON_PROP));
				conf.set("hbase.zookeeper.quorum", zkHost);
				conf.set("zookeeper.znode.parent", "/hbase");
			}
		} catch (Exception e) {
			logger.error("HBase Configuration Initialization failure !");
			throw new RuntimeException(e) ;
		}
	}

	/**
	 * 获得链接
	 * @return
     */
	public static synchronized Connection getConnection() {
		try {
            if(conn == null || conn.isClosed()){
                conn = ConnectionFactory.createConnection(conf);
            }
//         System.out.println("---------- " + conn.hashCode());
		} catch (IOException e) {
			logger.error("HBase 建立链接失败 ", e);
		}
		return conn;

	}

	/**
	 * 创建表
	 * @param tableName
	 * @throws Exception
     */
	public static void createTable(String tableName, String[] columnFamilies, boolean preBuildRegion) throws Exception {
		if(preBuildRegion){
			String[] s = new String[] { "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F" };
			int partition = 16;
			byte[][] splitKeys = new byte[partition - 1][];
			for (int i = 1; i < partition; i++) {
				splitKeys[i - 1] = Bytes.toBytes(s[i - 1]);
			}
			createTable(tableName, columnFamilies, splitKeys);
		} else {
			createTable(tableName, columnFamilies);
		}
	}

	private static void createTable(String tableName, int pNum, boolean only) throws Exception {
		String[] s = RandCodeEnum.HBASE_CHAR.getHbaseKeys(pNum,2,only);
		byte[][] splitKeys = new byte[pNum][];
		for (int i = 1; i <= pNum; i++) {
			splitKeys[i - 1] = Bytes.toBytes(s[i - 1]);
		}
		createTable(tableName, new String[] { "events" }, splitKeys);
	}
	
	/**
	 * 建表
	 * @param tableName
	 * @param cfs
	 * @throws IOException
	 */
	private static void createTable(String tableName, String[] cfs, byte[][] splitKeys) throws Exception {
		Connection conn = getConnection();
		HBaseAdmin admin = (HBaseAdmin) conn.getAdmin();
		try {
			if (admin.tableExists(tableName)) {
				logger.warn("Table: {} is exists!", tableName);
				return;
			}
			HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(tableName));
			for (int i = 0; i < cfs.length; i++) {
				HColumnDescriptor hColumnDescriptor = new HColumnDescriptor(cfs[i]);
				hColumnDescriptor.setCompressionType(Compression.Algorithm.SNAPPY);
				hColumnDescriptor.setMaxVersions(1);
				tableDesc.addFamily(hColumnDescriptor);
			}
			admin.createTable(tableDesc, splitKeys);
			logger.info("Table: {} create success!", tableName);
		} finally {
			admin.close();
			closeConnect(conn);
		}
	}

	/**
	 * 建表
	 * @param tableName
	 * @param cfs
	 * @throws IOException
	 */
	private static void createTable(String tableName, String[] cfs) throws Exception {
		Connection conn = getConnection();
		HBaseAdmin admin = (HBaseAdmin) conn.getAdmin();
		try {
			if (admin.tableExists(tableName)) {
				logger.warn("Table: {} is exists!", tableName);
				return;
			}
			HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(tableName));
			for (int i = 0; i < cfs.length; i++) {
				HColumnDescriptor hColumnDescriptor = new HColumnDescriptor(cfs[i]);
				hColumnDescriptor.setCompressionType(Compression.Algorithm.SNAPPY);
				hColumnDescriptor.setMaxVersions(1);
				tableDesc.addFamily(hColumnDescriptor);
			}
			admin.createTable(tableDesc);
			logger.info("Table: {} create success!", tableName);
		} finally {
			admin.close();
			closeConnect(conn);
		}
	}

    /**
     * 删除表
     * @param tablename
     * @throws IOException
     */
	public static void deleteTable(String tablename) throws IOException {
		Connection conn = getConnection();
		HBaseAdmin admin = (HBaseAdmin) conn.getAdmin();
		try {
			if (!admin.tableExists(tablename)) {
				logger.warn("Table: {} is not exists!", tablename);
				return;
			}
			admin.disableTable(tablename);
			admin.deleteTable(tablename);
			logger.info("Table: {} delete success!", tablename);
		} finally {
			admin.close();
			closeConnect(conn);
		}
	}
	
	/**
	 * 获取  Table
	 * @param tableName 表名
	 * @return
	 * @throws IOException
	 */
	public static Table getTable(String tableName){
		try {
			return getConnection().getTable(TableName.valueOf(tableName));
		} catch (Exception e) {
			logger.error("Obtain Table failure !", e);
		}
		return null;
	}

	/**
	 * 给 table 创建 snapshot
	 * @param snapshotName 快照名称
     * @param tableName 表名
	 * @return
	 * @throws IOException
	 */
	public static void snapshot(String snapshotName, TableName tableName){
		try {
            Admin admin = getConnection().getAdmin();
            admin.snapshot(snapshotName, tableName);
		} catch (Exception e) {
			logger.error("Snapshot " + snapshotName + " create failed !", e);
		}
	}

    /**
     * 获得现已有的快照
     * @param snapshotNameRegex 正则过滤表达式
     * @return
     * @throws IOException
     */
    public static List listSnapshots(String snapshotNameRegex){
        try {
            Admin admin = getConnection().getAdmin();
            if(StringUtils.isNotBlank(snapshotNameRegex))
                return admin.listSnapshots(snapshotNameRegex);
            else
                return admin.listSnapshots();
        } catch (Exception e) {
            logger.error("Snapshot " + snapshotNameRegex + " get failed !", e);
        }
        return null;
    }

    /**
     * 批量删除Snapshot
     * @param snapshotNameRegex 正则过滤表达式
     * @return
     * @throws IOException
     */
    public static void deleteSnapshots(String snapshotNameRegex){
        try {
            Admin admin = getConnection().getAdmin();
            if(StringUtils.isNotBlank(snapshotNameRegex))
                admin.deleteSnapshots(snapshotNameRegex);
            else
                logger.error("SnapshotNameRegex can't be null !");
        } catch (Exception e) {
            logger.error("Snapshots " + snapshotNameRegex + " del failed !", e);
        }
    }

    /**
     * 单个删除Snapshot
     * @param snapshotName 正则过滤表达式
     * @return
     * @throws IOException
     */
    public static void deleteSnapshot(String snapshotName){
        try {
            Admin admin = getConnection().getAdmin();
            if(StringUtils.isNotBlank(snapshotName))
                admin.deleteSnapshot(snapshotName);
            else
                logger.error("SnapshotName can't be null !");
        } catch (Exception e) {
            logger.error("Snapshot " + snapshotName + " del failed !", e);
        }
    }

	/**
	 * 分页检索表数据。
* (如果在创建表时为此表指定了非默认的命名空间,则需拼写上命名空间名称,格式为【namespace:tablename】)。 * @param tableName 表名称(*)。 * @param startRowKey 起始行键(可以为空,如果为空,则从表中第一行开始检索)。 * @param endRowKey 结束行键(可以为空)。 * @param filterList 检索条件过滤器集合(不包含分页过滤器;可以为空)。 * @param maxVersions 指定最大版本数【如果为最大整数值,则检索所有版本;如果为最小整数值,则检索最新版本;否则只检索指定的版本数】。 * @param pageModel 分页模型(*)。 * @return 返回HBasePageModel分页对象。 */ public static HBasePageModel scanResultByPageFilter(String tableName, byte[] startRowKey, byte[] endRowKey, FilterList filterList, int maxVersions, HBasePageModel pageModel) { if(pageModel == null) { pageModel = new HBasePageModel(10); } if(maxVersions <= 0 ) { //默认只检索数据的最新版本 maxVersions = Integer.MIN_VALUE; } pageModel.initStartTime(); pageModel.initEndTime(); if(StringUtils.isBlank(tableName)) { return pageModel; } Table table = null; try { table = getTable(tableName); int tempPageSize = pageModel.getPageSize(); boolean isEmptyStartRowKey = false; if(startRowKey == null) { //则读取表的第一行记录 Result firstResult = selectFirstResultRow(tableName, filterList); if(firstResult.isEmpty()) { return pageModel; } startRowKey = firstResult.getRow(); } if(pageModel.getPageStartRowKey() == null) { isEmptyStartRowKey = true; pageModel.setPageStartRowKey(startRowKey); } else { if(pageModel.getPageEndRowKey() != null) { pageModel.setPageStartRowKey(pageModel.getPageEndRowKey()); } //从第二页开始,每次都多取一条记录,因为第一条记录是要删除的。 tempPageSize += 1; } Scan scan = new Scan(); scan.setStartRow(pageModel.getPageStartRowKey()); if(endRowKey != null) { scan.setStopRow(endRowKey); } PageFilter pageFilter = new PageFilter(pageModel.getPageSize() + 1); if(filterList != null) { filterList.addFilter(pageFilter); scan.setFilter(filterList); } else { scan.setFilter(pageFilter); } if(maxVersions == Integer.MAX_VALUE) { scan.setMaxVersions(); } else if(maxVersions == Integer.MIN_VALUE) { } else { scan.setMaxVersions(maxVersions); } ResultScanner scanner = table.getScanner(scan); List resultList = new ArrayList(); int index = 0; for(Result rs : scanner.next(tempPageSize)) { if(isEmptyStartRowKey == false && index == 0) { index += 1; continue; } if(!rs.isEmpty()) { resultList.add(rs); } index += 1; } scanner.close(); pageModel.setResultList(resultList); } catch (Exception e) { e.printStackTrace(); } finally { try { table.close(); } catch (IOException e) { e.printStackTrace(); } } int pageIndex = pageModel.getPageIndex() + 1; pageModel.setPageIndex(pageIndex); if(pageModel.getResultList().size() > 0) { //获取本次分页数据首行和末行的行键信息 byte[] pageStartRowKey = pageModel.getResultList().get(0).getRow(); byte[] pageEndRowKey = pageModel.getResultList().get(pageModel.getResultList().size() - 1).getRow(); pageModel.setPageStartRowKey(pageStartRowKey); pageModel.setPageEndRowKey(pageEndRowKey); } int queryTotalCount = pageModel.getQueryTotalCount() + pageModel.getResultList().size(); pageModel.setQueryTotalCount(queryTotalCount); pageModel.initEndTime(); pageModel.printTimeInfo(); return pageModel; } /** * 检索指定表的第一行记录。
* (如果在创建表时为此表指定了非默认的命名空间,则需拼写上命名空间名称,格式为【namespace:tablename】)。 * @param tableName 表名称(*)。 * @param filterList 过滤器集合,可以为null。 * @return */ public static Result selectFirstResultRow(String tableName,FilterList filterList) { if(StringUtils.isBlank(tableName)) return null; Table table = null; try { table = getTable(tableName); Scan scan = new Scan(); if(filterList != null) { scan.setFilter(filterList); } ResultScanner scanner = table.getScanner(scan); Iterator iterator = scanner.iterator(); int index = 0; while(iterator.hasNext()) { Result rs = iterator.next(); if(index == 0) { scanner.close(); return rs; } } } catch (IOException e) { e.printStackTrace(); } finally { try { table.close(); } catch (IOException e) { e.printStackTrace(); } } return null; } /** * 异步往指定表添加数据 * @param tablename 表名 * @param puts 需要添加的数据 * @return long 返回执行时间 * @throws IOException */ public static long put(String tablename, List puts) throws Exception { long currentTime = System.currentTimeMillis(); Connection conn = getConnection(); final BufferedMutator.ExceptionListener listener = new BufferedMutator.ExceptionListener() { @Override public void onException(RetriesExhaustedWithDetailsException e, BufferedMutator mutator) { for (int i = 0; i < e.getNumExceptions(); i++) { System.out.println("Failed to sent put " + e.getRow(i) + "."); logger.error("Failed to sent put " + e.getRow(i) + "."); } } }; BufferedMutatorParams params = new BufferedMutatorParams(TableName.valueOf(tablename)) .listener(listener); params.writeBufferSize(5 * 1024 * 1024); final BufferedMutator mutator = conn.getBufferedMutator(params); try { mutator.mutate(puts); mutator.flush(); } finally { mutator.close(); closeConnect(conn); } return System.currentTimeMillis() - currentTime; } /** * 异步往指定表添加数据 * @param tablename 表名 * @param put 需要添加的数据 * @return long 返回执行时间 * @throws IOException */ public static long put(String tablename, SocPut put) throws Exception { return put(tablename, Arrays.asList(put)); } /** * 往指定表添加数据 * @param tablename 表名 * @param puts 需要添加的数据 * @return long 返回执行时间 * @throws IOException */ public static long putByHTable(String tablename, List puts) throws Exception { long currentTime = System.currentTimeMillis(); Connection conn = getConnection(); HTable htable = (HTable) conn.getTable(TableName.valueOf(tablename)); htable.setAutoFlushTo(false); htable.setWriteBufferSize(5 * 1024 * 1024); try { htable.put((List)puts); htable.flushCommits(); } finally { htable.close(); closeConnect(conn); } return System.currentTimeMillis() - currentTime; } /** * 删除单条数据 * @param tablename * @param row * @throws IOException */ public static void delete(String tablename, String row) throws IOException { Table table = getTable(tablename); if(table!=null){ try { Delete d = new Delete(row.getBytes()); table.delete(d); } finally { table.close(); } } } /** * 删除多行数据 * @param tablename * @param rows * @throws IOException */ public static void delete(String tablename, String[] rows) throws IOException { Table table = getTable(tablename); if (table != null) { try { List list = new ArrayList(); for (String row : rows) { Delete d = new Delete(row.getBytes()); list.add(d); } if (list.size() > 0) { table.delete(list); } } finally { table.close(); } } } /** * 关闭连接 * @throws IOException */ public static void closeConnect(Connection conn){ if(null != conn){ try { // conn.close(); } catch (Exception e) { logger.error("closeConnect failure !", e); } } } /** * 获取单条数据 * @param tablename * @param row * @return * @throws IOException */ public static Result getRow(String tablename, byte[] row) { Table table = getTable(tablename); Result rs = null; if(table!=null){ try{ Get g = new Get(row); rs = table.get(g); } catch (IOException e) { logger.error("getRow failure !", e); } finally{ try { table.close(); } catch (IOException e) { logger.error("getRow failure !", e); } } } return rs; } /** * 获取多行数据 * @param tablename * @param rows * @return * @throws Exception */ public static Result[] getRows(String tablename, List rows) { Table table = getTable(tablename); List gets = null; Result[] results = null; try { if (table != null) { gets = new ArrayList(); for (T row : rows) { if(row!=null){ gets.add(new Get(Bytes.toBytes(String.valueOf(row)))); }else{ throw new RuntimeException("hbase have no data"); } } } if (gets.size() > 0) { results = table.get(gets); } } catch (IOException e) { logger.error("getRows failure !", e); } finally { try { table.close(); } catch (IOException e) { logger.error("table.close() failure !", e); } } return results; } /** * 扫描整张表,注意使用完要释放。 * @param tablename * @return * @throws IOException */ public static ResultScanner get(String tablename) { Table table = getTable(tablename); ResultScanner results = null; if (table != null) { try { Scan scan = new Scan(); scan.setCaching(1000); results = table.getScanner(scan); } catch (IOException e) { logger.error("getResultScanner failure !", e); } finally { try { table.close(); } catch (IOException e) { logger.error("table.close() failure !", e); } } } return results; } /** * 格式化输出结果 */ public static void formatRow(KeyValue[] rs){ for(KeyValue kv : rs){ System.out.println(" column family : " + Bytes.toString(kv.getFamily())); System.out.println(" column : " + Bytes.toString(kv.getQualifier())); System.out.println(" value : " + Bytes.toString(kv.getValue())); System.out.println(" timestamp : " + String.valueOf(kv.getTimestamp())); System.out.println("--------------------"); } } /** * byte[] 类型的长整形数字转换成 long 类型 * @param byteNum * @return */ public static long bytes2Long(byte[] byteNum) { long num = 0; for (int ix = 0; ix < 8; ++ix) { num <<= 8; num |= (byteNum[ix] & 0xff); } return num; } }

HBaseService 实现类 HBaseServiceImpl.java

package cn.ngsoc.hbase;

import cn.ngsoc.hbase.util.HBaseUtil;
import cn.ngsoc.hbase.util.ThreadPoolUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Arrays;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;

/**
 * HBaseService  实现类
 * Created by babylon on 2016/12/5.
 */
public class HBaseServiceImpl extends AbstractHBaseService{

    private static final Logger logger = LoggerFactory.getLogger(HBaseServiceImpl.class);

    private ThreadPoolUtil threadPool= ThreadPoolUtil.init();       // 初始化线程池

    @Override
    public void put(String tableName, Put put, boolean waiting) {
        batchPut(tableName, Arrays.asList(put), waiting);
     }

     /**
     * 多线程同步提交
     * @param tableName  表名称
     * @param puts  待提交参数
     * @param waiting  是否等待线程执行完成  true 可以及时看到结果, false 让线程继续执行,并跳出此方法返回调用方主程序
     */
    @Override
    public void batchPut(final String tableName, final List puts, boolean waiting) {
        threadPool.execute(new Runnable() {
            @Override
            public void run() {
                try {
                    HBaseUtil.put(tableName, puts);
                } catch (Exception e) {
                    logger.error("batchPut failed . ", e);
                }
            }
        });

        if(waiting){
            try {
                threadPool.awaitTermination();
            } catch (InterruptedException e) {
                logger.error("HBase put job thread pool await termination time out.", e);
            }
        }
    }

    @Override
    public  Result[] getRows(String tablename, List rows) {
        return HBaseUtil.getRows(tablename, rows);
    }

    @Override
    public Result getRow(String tablename, byte[] row) {
        return HBaseUtil.getRow(tablename, row);
    }

    /**
     * 多线程异步提交
     * @param tableName  表名称
     * @param puts  待提交参数
     * @param waiting  是否等待线程执行完成  true 可以及时看到结果, false 让线程继续执行,并跳出此方法返回调用方主程序
     */
    public void batchAsyncPut(final String tableName, final List puts, boolean waiting) {
        Future f = threadPool.submit(new Runnable() {
            @Override
            public void run() {
                try {
                    HBaseUtil.putByHTable(tableName, puts);
                } catch (Exception e) {
                    logger.error("batchPut failed . ", e);
                }
            }
        });

        if(waiting){
            try {
                f.get();
            } catch (InterruptedException e) {
                logger.error("多线程异步提交返回数据执行失败.", e);
            } catch (ExecutionException e) {
                logger.error("多线程异步提交返回数据执行失败.", e);
            }
        }
    }

    /**
     * 创建表
     * @param tableName         表名称
     * @param columnFamilies   列族名称数组
     * @param preBuildRegion  是否预分配Region   true 是  , false 否  默认 16个region,rowkey生成的时候记得指定前缀
     * @return  返回执行时间 (单位: 毫秒)
     */
    public void createTable(String tableName, String[] columnFamilies, boolean preBuildRegion) throws Exception {
        HBaseUtil.createTable(tableName, columnFamilies, preBuildRegion);
    }

}

代码使用案例  HBasePutTest.java

import cn.ngsoc.hbase.HBase;
import cn.ngsoc.hbase.util.HBaseUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Before;
import org.junit.Test;

import java.util.Arrays;
import java.util.Random;

/**
 * HBase 测试类
 * Created by babylon on 2016/11/29.
 */
public class HBasePutTest {

    @Before
    public void init(){
        HBaseUtil.init("zkHost");
    }

    @Test
    public void testPut()  {
        Put put = new Put(Bytes.toBytes("rowKey"));
        put.addColumn(Bytes.toBytes("events"), Bytes.toBytes("severity"), Bytes.toBytes(new Random().nextInt(10)+""));
        HBase.put("logs", Arrays.asList(new Object[]{put}), true);
    }

    @Test
    public void testGet()  {
        Result result = HBaseUtil.getRow("logs", HBase.generateRowkey("rowKey"));
        HBaseUtil.formatRow(result.raw());
    }

    @Test
    public void testCreateTable()  {
        try {
            HBaseUtil.createTable("logs", new String[]{"events"}, true);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}
完整代码见 Github

https://github.com/JasonBabylon/hbase-operations-with-java/tree/master

你可能感兴趣的:(hbase)