package org.apache.hadoop.hbase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.util.Bytes;
public class HbaseClientTest {
/*
* 跟去表名获取表的实例
*/
public static HTable getTable (String name) throws Exception{
//get the hbase conf instance
Configuration conf = HBaseConfiguration.create();
//get the hbase table instance
HTable table = new HTable(conf, name);
return table;
}
/**
* get the data from the hbase table
*
* get 'tbname','rowkey','cf:col'
*
* 列簇-》列名-》value-》timestamp
*/
public static void getData(HTable table) throws Exception {
// TODO Auto-generated method stub
Get get = new Get(Bytes.toBytes("20161119_10003"));
//conf the get
//get.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"));
get.addFamily(Bytes.toBytes("info"));
//load the get
Result rs = table.get(get);
//print the data
for(Cell cell : rs.rawCells()){
System.out.println(
Bytes.toString(CellUtil.cloneFamily(cell))
+"->"+
Bytes.toString(CellUtil.cloneQualifier(cell))
+"->"+
Bytes.toString(CellUtil.cloneValue(cell))
+"->"+
cell.getTimestamp()
);
System.out.println("------------------------------");
}
}
/**
* put the data to the hbase table
*
* put 'tbname','rowkey','cf:col','value'
*
*/
public static void putData(HTable table) throws Exception {
//get the put instance
Put put = new Put(Bytes.toBytes("20161119_10003"));
//conf the put
put.add(
Bytes.toBytes("info"),
Bytes.toBytes("age"),
Bytes.toBytes("20")
);
//load the put
table.put(put);
//print
getData(table);
}
/**
* delete the data from the hbase table
*
* delete 'tbname','rowkey','cf:col'
*
*/
public static void deleteData(HTable table) throws Exception {
//get the delete instance
Delete del = new Delete(Bytes.toBytes("20161119_10003"));
//conf the del
//del.deleteColumn(Bytes.toBytes("info"),Bytes.toBytes("age"));
del.deleteColumns(Bytes.toBytes("info"),Bytes.toBytes("age"));
//load the del
table.delete(del);
//print
getData(table);
}
/**
* scan the all table
* scan 'tbname'
*
*/
public static void scanData(HTable table) throws Exception {
//get the scan instance
Scan scan = new Scan();
//load the scan
ResultScanner rsscan = table.getScanner(scan);
for(Result rs : rsscan){
System.out.println(Bytes.toString(rs.getRow()));
for(Cell cell : rs.rawCells()){
System.out.println(
Bytes.toString(CellUtil.cloneFamily(cell))
+"->"+
Bytes.toString(CellUtil.cloneQualifier(cell))
+"->"+
Bytes.toString(CellUtil.cloneValue(cell))
+"->"+
cell.getTimestamp()
);
}
System.out.println("------------------------------");
}
}
/**
* scan the table with limit
*
* scan 'tbname',{STARTROW => 'row1',STOPROW => 'row2'}
*/
public static void rangeData(HTable table) throws Exception {
//get the scan instance
Scan scan = new Scan();
//conf the scan
//scan.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"));
//scan.addFamily(family);
//scan.setStartRow(Bytes.toBytes("20161119_10002"));
//scan.setStopRow(Bytes.toBytes("20161119_10003"));
Filter filter = new PrefixFilter(Bytes.toBytes("2016111"));
scan.setFilter(filter);
//hbase conf
//是否启动缓存
scan.setCacheBlocks(true);
//设置缓存的条数
scan.setCaching(100);
//每一次取多少条
scan.setBatch(10);
//共同决定了请求RPC的次数
//load the scan
ResultScanner rsscan = table.getScanner(scan);
for(Result rs : rsscan){
System.out.println(Bytes.toString(rs.getRow()));
for(Cell cell : rs.rawCells()){
System.out.println(
Bytes.toString(CellUtil.cloneFamily(cell))
+"->"+
Bytes.toString(CellUtil.cloneQualifier(cell))
+"->"+
Bytes.toString(CellUtil.cloneValue(cell))
+"->"+
cell.getTimestamp()
);
}
System.out.println("------------------------------");
}
}
public static void main(String[] args) throws Exception {
HTable table = getTable("test:tb1");
getData(table);
putData(table);
deleteData(table);
scanData(table);
rangeData(table);
}
}
可以把hbase表中的数据作为mapreduce计算框架的输入,或者把mapreduce的计算结果输出到hbase表中。
我们以hbase中自带的mapreduce程序举例
$ export HBASE_HOME=/opt/modules/hbase-0.98.6-hadoop2
$ export HADOOP_HOME=/opt/modules/hadoop-2.5.0
# $HBASE_HOME/bin/hbase mapredcp可以列出hbase在yarn上运行所需的jar包
$ export HADOOP_CLASSPATH=`$HBASE_HOME/bin/hbase mapredcp`
$ $HADOOP_HOME/bin/yarn jar lib/hbase-server-0.98.6-hadoop2.jar rowcounter test:tb1
HBase数据来源于日志文件或者RDBMS,把数据迁移到HBase表中。常见的有三种方法:(1)使用HBase Put API;(2)使用HBase批量加载工具;(3)自定义MapReduce job实现。
importtsv是HBase官方提供的基于mapreduce的批量数据导入工具,同时也是hbase提供的一个命令行工具,可以将存储在HDFS上的自定义分隔符(默认是\t)的数据文件,通过一条命令方便的导入到HBase中。
测试
[wulei@bigdata-00 datas]$ cat tb1.tsv
10001 zhangsan 20
10002 lisi 22
10003 wangwu 30
$ bin/hdfs dfs -put /opt/datas/tb1.tsv /
> create 'student','info'
$HADOOP_HOME/bin/yarn jar lib/hbase-server-0.98.6-hadoop2.jar importtsv -Dimporttsv.separator=\t -Dimporttsv.columns=HBASE_ROW_KEY,info:name,info:age student /tb1.tsv
Dimporttsv.columns为指定分隔符hbase(main):010:0> scan 'student'
ROW COLUMN+CELL
10001 column=info:age, timestamp=1480123167099, value=20
10001 column=info:name, timestamp=1480123167099, value=zhangsan
10002 column=info:age, timestamp=1480123167099, value=22
10002 column=info:name, timestamp=1480123167099, value=lisi
2 row(s) in 0.8210 seconds