注意: HTablePool是HBase连接池的老用法,该类在0.94,0.95和0.96中已经不建议使用,在0.98.1版本以后已经移除,仅供参考
HBase提供了Java Api的访问接口,掌握这个就跟Java应用使用RDBMS时需要JDBC一样重要,本文介绍常用的Api。
将HBase解压后根目录下的hbase-0.90.2.jar、hbase-0.90.2-tests.jar和lib子目录下所有jar 包添加到本工程的Classpath下。
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.HTablePool; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.util.Bytes; public class Hbase { // 声明静态配置 static Configuration conf = null; static { conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.property.clientPort", "2181"); conf.set("hbase.zookeeper.quorum", "192.168.1.100"); conf.set("hbase.master", "192.168.1.100:600000"); } /* * 创建表 * * @tableName 表名 * * @family 列族列表 */ public static void creatTable(String tableName, String[] family) throws Exception { HBaseAdmin admin = new HBaseAdmin(conf); HTableDescriptor desc = new HTableDescriptor(tableName); for (int i = 0; i < family.length; i++) { desc.addFamily(new HColumnDescriptor(family[i])); } if (admin.tableExists(tableName)) { System.out.println("table Exists!"); System.exit(0); } else { admin.createTable(desc); System.out.println("create table Success!"); } } /* * 为表添加数据(适合知道有多少列族的固定表) * * @rowKey rowKey * * @tableName 表名 * * @column1 第一个列族列表 * * @value1 第一个列的值的列表 * * @column2 第二个列族列表 * * @value2 第二个列的值的列表 */ public static void addData(String rowKey, String tableName, String[] column1, String[] value1, String[] column2, String[] value2) throws IOException { Put put = new Put(Bytes.toBytes(rowKey));// 设置rowkey HTable table = new HTable(conf, Bytes.toBytes(tableName));// HTabel负责跟记录相关的操作如增删改查等// // 获取表 HColumnDescriptor[] columnFamilies = table.getTableDescriptor() // 获取所有的列族 .getColumnFamilies(); for (int i = 0; i < columnFamilies.length; i++) { String familyName = columnFamilies[i].getNameAsString(); // 获取列族名 if (familyName.equals("article")) { // article列族put数据 for (int j = 0; j < column1.length; j++) { put.add(Bytes.toBytes(familyName), Bytes.toBytes(column1[j]), Bytes.toBytes(value1[j])); } } if (familyName.equals("author")) { // author列族put数据 for (int j = 0; j < column2.length; j++) { put.add(Bytes.toBytes(familyName), Bytes.toBytes(column2[j]), Bytes.toBytes(value2[j])); } } } table.put(put); System.out.println("add data Success!"); } //批量添加 private void write(String hbaseTableName, String columnFamily, String qualifier, String keyPrefix, Collection<String> contents) { HTableInterface table = null; try { table = tablePool.getTable(hbaseTableName); List<Put> putList = new ArrayList<Put>(); int idx = 0; for (String line : contents) { String rowKey = keyPrefix + idx; if (contents.size() == 1) rowKey = keyPrefix; idx++; Put put = new Put(rowKey.getBytes()); put.add(columnFamily.getBytes(), qualifier.getBytes(), line.getBytes()); putList.add(put); if (putList.size() >= BATCH_SIZE) { table.put(putList); table.flushCommits(); putList.clear(); } } table.put(putList); table.flushCommits(); } catch (Throwable e) { LOG.error("ERROR: write into table: " + hbaseTableName + ", prefix key:" + keyPrefix, e); } finally { if (table != null) { try { table.close(); } catch (IOException e) { LOG.error("close table error, write into table: " + hbaseTableName + ", prefix key:" + keyPrefix, e); } } } } /* * 根据rwokey查询 * * @rowKey rowKey * * @tableName 表名 */ public static Result getResult(String tableName, String rowKey) throws IOException { Get get = new Get(Bytes.toBytes(rowKey)); HTable table = new HTable(conf, Bytes.toBytes(tableName));// 获取表 Result result = table.get(get); for (KeyValue kv : result.list()) { System.out.println("family:" + Bytes.toString(kv.getFamily())); System.out .println("qualifier:" + Bytes.toString(kv.getQualifier())); System.out.println("value:" + Bytes.toString(kv.getValue())); System.out.println("Timestamp:" + kv.getTimestamp()); System.out.println("-------------------------------------------"); } return result; } /* * 遍历查询hbase表 * * @tableName 表名 */ public static void getResultScann(String tableName) throws IOException { Scan scan = new Scan(); ResultScanner rs = null; HTable table = new HTable(conf, Bytes.toBytes(tableName)); try { rs = table.getScanner(scan); for (Result r : rs) { for (KeyValue kv : r.list()) { System.out.println("row:" + Bytes.toString(kv.getRow())); System.out.println("family:" + Bytes.toString(kv.getFamily())); System.out.println("qualifier:" + Bytes.toString(kv.getQualifier())); System.out .println("value:" + Bytes.toString(kv.getValue())); System.out.println("timestamp:" + kv.getTimestamp()); System.out .println("-------------------------------------------"); } } } finally { rs.close(); } } /* * 遍历查询hbase表 * * @tableName 表名 */ public static void getResultScann(String tableName, String start_rowkey, String stop_rowkey) throws IOException { Scan scan = new Scan(); scan.setStartRow(Bytes.toBytes(start_rowkey)); scan.setStopRow(Bytes.toBytes(stop_rowkey)); ResultScanner rs = null; HTable table = new HTable(conf, Bytes.toBytes(tableName)); try { rs = table.getScanner(scan); for (Result r : rs) { for (KeyValue kv : r.list()) { System.out.println("row:" + Bytes.toString(kv.getRow())); System.out.println("family:" + Bytes.toString(kv.getFamily())); System.out.println("qualifier:" + Bytes.toString(kv.getQualifier())); System.out .println("value:" + Bytes.toString(kv.getValue())); System.out.println("timestamp:" + kv.getTimestamp()); System.out .println("-------------------------------------------"); } } } finally { rs.close(); } } /* * 查询表中的某一列 * * @tableName 表名 * * @rowKey rowKey */ public static void getResultByColumn(String tableName, String rowKey, String familyName, String columnName) throws IOException { HTable table = new HTable(conf, Bytes.toBytes(tableName)); Get get = new Get(Bytes.toBytes(rowKey)); get.addColumn(Bytes.toBytes(familyName), Bytes.toBytes(columnName)); // 获取指定列族和列修饰符对应的列 Result result = table.get(get); for (KeyValue kv : result.list()) { System.out.println("family:" + Bytes.toString(kv.getFamily())); System.out .println("qualifier:" + Bytes.toString(kv.getQualifier())); System.out.println("value:" + Bytes.toString(kv.getValue())); System.out.println("Timestamp:" + kv.getTimestamp()); System.out.println("-------------------------------------------"); } } /* * 更新表中的某一列 * * @tableName 表名 * * @rowKey rowKey * * @familyName 列族名 * * @columnName 列名 * * @value 更新后的值 */ public static void updateTable(String tableName, String rowKey, String familyName, String columnName, String value) throws IOException { HTable table = new HTable(conf, Bytes.toBytes(tableName)); Put put = new Put(Bytes.toBytes(rowKey)); put.add(Bytes.toBytes(familyName), Bytes.toBytes(columnName), Bytes.toBytes(value)); table.put(put); System.out.println("update table Success!"); } /* * 查询某列数据的多个版本 * * @tableName 表名 * * @rowKey rowKey * * @familyName 列族名 * * @columnName 列名 */ public static void getResultByVersion(String tableName, String rowKey, String familyName, String columnName) throws IOException { HTable table = new HTable(conf, Bytes.toBytes(tableName)); Get get = new Get(Bytes.toBytes(rowKey)); get.addColumn(Bytes.toBytes(familyName), Bytes.toBytes(columnName)); get.setMaxVersions(5); Result result = table.get(get); for (KeyValue kv : result.list()) { System.out.println("family:" + Bytes.toString(kv.getFamily())); System.out .println("qualifier:" + Bytes.toString(kv.getQualifier())); System.out.println("value:" + Bytes.toString(kv.getValue())); System.out.println("Timestamp:" + kv.getTimestamp()); System.out.println("-------------------------------------------"); } /* * List<?> results = table.get(get).list(); Iterator<?> it = * results.iterator(); while (it.hasNext()) { * System.out.println(it.next().toString()); } */ } /* * 删除指定的列 * * @tableName 表名 * * @rowKey rowKey * * @familyName 列族名 * * @columnName 列名 */ public static void deleteColumn(String tableName, String rowKey, String falilyName, String columnName) throws IOException { HTable table = new HTable(conf, Bytes.toBytes(tableName)); Delete deleteColumn = new Delete(Bytes.toBytes(rowKey)); deleteColumn.deleteColumns(Bytes.toBytes(falilyName), Bytes.toBytes(columnName)); table.delete(deleteColumn); System.out.println(falilyName + ":" + columnName + "is deleted!"); } /* * 删除指定的列 * * @tableName 表名 * * @rowKey rowKey */ public static void deleteAllColumn(String tableName, String rowKey) throws IOException { HTable table = new HTable(conf, Bytes.toBytes(tableName)); Delete deleteAll = new Delete(Bytes.toBytes(rowKey)); table.delete(deleteAll); System.out.println("all columns are deleted!"); } //批量删除 private void delete(String hbaseTableName, String columnFamily, String qualifier, Collection<String> rowKeys) { HTableInterface table = null; try { table = tablePool.getTable(hbaseTableName); List<Delete> deleteList = new ArrayList<Delete>(); int idx = 0; for (String r : rowKeys) { Delete del = new Delete(r.getBytes()); deleteList.add(del); if (deleteList.size() >= BATCH_SIZE) { table.delete(deleteList); table.flushCommits(); deleteList.clear(); } idx++; } table.delete(deleteList); table.flushCommits(); LOG.info("deleted " + idx + " rows from HBase table. " + hbaseTableName); } catch (Throwable e) { LOG.error("delete from table: " + hbaseTableName, e); } finally { if (table != null) { try { table.close(); } catch (IOException e) { LOG.error("close table error, delete from table: " + hbaseTableName, e); } } } } /* * 删除表 * * @tableName 表名 */ public static void deleteTable(String tableName) throws IOException { HBaseAdmin admin = new HBaseAdmin(conf); admin.disableTable(tableName); admin.deleteTable(tableName); System.out.println(tableName + "is deleted!"); } public static void main(String[] args) throws Exception { // 创建表 String tableName = "blog2"; String[] family = { "article", "author" }; // creatTable(tableName, family); // 为表添加数据 String[] column1 = { "title", "content", "tag" }; String[] value1 = { "Head First HBase", "HBase is the Hadoop database. Use it when you need random, realtime read/write access to your Big Data.", "Hadoop,HBase,NoSQL" }; String[] column2 = { "name", "nickname" }; String[] value2 = { "nicholas", "lee" }; addData("rowkey1", "blog2", column1, value1, column2, value2); addData("rowkey2", "blog2", column1, value1, column2, value2); addData("rowkey3", "blog2", column1, value1, column2, value2); // 遍历查询 getResultScann("blog2", "rowkey4", "rowkey5"); // 根据row key范围遍历查询 getResultScann("blog2", "rowkey4", "rowkey5"); // 查询 getResult("blog2", "rowkey1"); // 查询某一列的值 getResultByColumn("blog2", "rowkey1", "author", "name"); // 更新列 updateTable("blog2", "rowkey1", "author", "name", "bin"); // 查询某一列的值 getResultByColumn("blog2", "rowkey1", "author", "name"); // 查询某列的多版本 getResultByVersion("blog2", "rowkey1", "author", "name"); // 删除一列 deleteColumn("blog2", "rowkey1", "author", "nickname"); // 删除所有列 deleteAllColumn("blog2", "rowkey1"); // 删除表 deleteTable("blog2"); } }
注意:可能大家没看到更新数据的操作,其实更新的操作跟添加完全一致,只不过是添加呢rowkey不存在,更新呢rowkey已经存在,并且timstamp相同的情况下,还有就是目前好像还没办法实现hbase数据的分页查询,不知道有没有人知道怎么做
HBase性能优化建议:
针对前面的代码,有很多不足之处,在此我就不修改上面的代码了,只是提出建议的地方,大家自己加上
1)配置
当你调用create方法时将会加载两个配置文件:hbase-default.xml and hbase-site.xml,利用的是当前的java类路径, 代码中configuration设置的这些配置将会覆盖hbase-default.xml和hbase-site.xml中相同的配置,如果两个配置文件都存在并且都设置好了相应参上面的属性下面的属性即可
2)关于入库
官方建议
table.setAutoFlush(false); //数据入库之前先设置此项为false
table.setflushCommits();//入库完成后,手动刷入数据
注意:
在入库过程中,put.setWriteToWAL(true/flase);
关于这一项如果不希望大量数据在存储过程中丢失,建议设置为true,如果仅是在测试演练阶段,为了节省入库时间建议设置为false
3)关于获取表实例
HTablePool pool = new HTablePool(configuration, Integer.MAX_VALUE);
HTable table = (HTable) pool.getTable(tableName);
建议用表连接池的方式获取表,具体池有什么作用,我想用过数据库连接池的同学都知道,我就不再重复
不建议使用new HTable(configuration,tableName);的方式获取表
4)关于查询
建议每个查询语句都放入try catch语句块,并且finally中要进行关闭ResultScanner实例以及将不使用的表重新放入到HTablePool中的操作,具体做法如下
public static void QueryAll(String tableName) { HTablePool pool = new HTablePool(configuration, Integer.MAX_VALUE); HTable table = null; ResultScanner rs = null; try { Scan scan = new Scan(); table = (HTable) pool.getTable(tableName); rs = table.getScanner(scan); for (Result r : rs) { System.out.println("获得到rowkey:" + new String(r.getRow())); for (KeyValue keyValue : r.raw()) { System.out.println("列:" + new String(keyValue.getFamily()) + "====值:" + new String(keyValue.getValue())); } } } catch (IOException e) { e.printStackTrace(); }finally{ rs.close();// 最后还得关闭 pool.putTable(table); //实际应用过程中,pool获取实例的方式应该抽取为单例模式的,不应在每个方法都重新获取一次(单例明白?就是抽取到专门获取pool的逻辑类中,具体逻辑为如果pool存在着直接使用,如果不存在则new) } }
所以,以上代码有缺陷的地方,感兴趣的同学可以针对优化建议作出相应修改
注意: HTablePool是HBase连接池的老用法,该类在0.94,0.95和0.96中已经不建议使用,在0.98.1版本以后已经移除,仅供参考
0.94,0.95和0.96HConnectionManager替代了HTablePool,实例代码如下:
package fulong.bigdata.hbase; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.HTableInterface; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.util.Bytes; public class ConnectionPoolTest { private static final String QUORUM = "FBI001,FBI002,FBI003"; private static final String CLIENTPORT = "2181"; private static final String TABLENAME = "rd_ns:itable"; private static Configuration conf = null; private static HConnection conn = null; static{ try { conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", QUORUM); conf.set("hbase.zookeeper.property.clientPort", CLIENTPORT); conn = HConnectionManager.createConnection(conf); } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) throws IOException { HTableInterface htable = ConnectionPoolTest.conn.getTable(TABLENAME); try { Scan scan = new Scan(); ResultScanner rs = htable.getScanner(scan); for (Result r : rs.next(5)) { for (Cell cell : r.rawCells()) { System.out.println("Rowkey : " + Bytes.toString(r.getRow()) + " Familiy:Quilifier : " + Bytes.toString(CellUtil.cloneQualifier(cell)) + " Value : " + Bytes.toString(CellUtil.cloneValue(cell)) + " Time : " + cell.getTimestamp()); } } } finally { htable.close(); } } }