HBase Java API编程实验
编程环境:
开发工具使用Eclipse,简单好用利于初学者
方式一(必学):创建一个普通Java Project,直接导入hbase-1.2.0-cdh5.7.0.tar.gz压缩包的lib子目录中与HBaseAPI编程有关的所有jar包,建议初学者使用
操作步骤:EClipse项目=》右键菜单=》Build Path =》Configure Build Path =》Libraries =》Add External Jars =》选中所有Jar包 =》Apply
方式二(选学):创建Maven Project,修改POM.xml文件,增加与HBase API编程相关的依赖配置参数,操作麻烦依赖互联网,不建议初学者使用
HBase集群环境:HBase Java开发环境一般采用Hadoop伪分布式+HBase伪分布式(使用内置ZooKeeper),只需要一台虚拟机,调试运行程序对实体主机的性能内存要求不高
记得修改win7的C:\Windows\System32\drivers\etc\hosts文件,增加IP和主机名映射,例如192.168.56.20 hadoop
启动虚拟机后,依次执行脚本命令start-dfs.sh 和 start-hbase.sh,启动hadoop和hbase
1) 指定一个表名,判断该表在HBase上是否存在
执行命令 hbhase shell 进入HBase Shell
执行命令list 查看存在哪些表,假如存在一张表名为“testTable”的表
hbase(main):006:0> exists 'students'
Table students does exist
0 row(s) in 0.2580 seconds
实现一个查看指定表名的表是否存在的Java源码程序
示例源码1:
package testHBase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.TableName;
public class testHBase
{
public static void main(String[] args) throws IOException
{
//调用HBaseConfiguration类的create静态方法,创建一个Configuration类的对象
Configuration configuration = HBaseConfiguration.create();
//加载需要连接HBase的各项配置参数,记得在开发机win7的hosts文件中增加虚拟机的主机名和IP地址的对应关系
configuration.set("hbase.zookeeper.quorum", "hadoop");
//configuration.set("hbase.rootdir", "hdfs://hadoop:8020/hbase");
//configuration.set("hbase.zookeeper.property.clientPort", "2181");
//调用ConnectionFactory类的createConnection静态方法,创建一个Connection类的对象,用于连接HBase
Connection connection = ConnectionFactory.createConnection(configuration);
//调用connection对象的getAdmin方法,获取一个Admin类的对象,用于执行HBase的管理操作
Admin admin = connection.getAdmin();
//调用TableName类的valueOf静态方法,得到一个TableName类的对象,tableName对象是表的名字
TableName tableName = TableName.valueOf("testTable");
//调用admin对象的tableExists方法,判断HBase中是否存在tableName这张表,存在则返回True
boolean isExists = admin.tableExists(tableName);
if(isExists)
{
System.out.println("Table exists");
}
else
{
System.out.println("Table dose not exists");
}
}
}
示例源码2(把主业务逻辑重构为一个类方法,方便代码复用)
package testHBase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.TableName;
public class testHBase
{
//定义一个类的静态成员类对象Configuration
public static Configuration configuration;
//使用HBaseConfiguration的静态方法创建一个Configuration类的对象,并设置关键配置参数ZooKeeper
static
{
configuration = HBaseConfiguration.create();
configuration.set("hbase.zookeeper.quorum", "hadoop");
}
//实现判断表是否存在的方法,输入参数是表名
public static boolean isTableExist(String tableName) throws IOException
{
Connection connection = ConnectionFactory.createConnection(configuration);
Admin admin = connection.getAdmin();
return admin.tableExists(TableName.valueOf(tableName));
}
public static void main(String[] args) throws IOException
{
boolean isExists = isTableExist("testTable1");
if(isExists)
{
System.out.println("Table exists");
}
else
{
System.out.println("Table dose not exists");
}
}
}
2) 指定一个表名,在HBase上创建一个新表
示例源码:
package testHBase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.TableName;
public class testHBase
{
//定义类的静态成员类对象
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
//将创建配置类和连接类的代码重构为静态代码块,有利于代码的复用
//使用 HBaseConfiguration 的静态方法创建一个Configuration类的对象
static
{
configuration = HBaseConfiguration.create();
configuration.set("hbase.zookeeper.quorum", "hadoop");
}
//使用 ConnectionFactory的静态方法创建一个Connection类的对象
static
{
try
{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}
catch (IOException e)
{
e.printStackTrace();
}
}
//实现创建一张表的方法,输入参数是表名和列族名,创建只包含一个列族的表
public static void createTable(String tName, String cfName) throws IOException
{
TableName tableName = TableName.valueOf(tName);
HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
HColumnDescriptor columnDescriptor = new HColumnDescriptor(cfName);
tableDescriptor.addFamily(columnDescriptor);
admin.createTable(tableDescriptor);
}
public static void main(String[] args) throws IOException
{
//执行创建一张新表的方法,只能指定一个列族
// createTable("testTable2", "columnFamily1");
//执行创建一张新表的方法,可指定多个列族
String[] columnFamilies = new String[] {"columnFamily1", "columnFamily2", "columnFamily3"};
createTable("testTable2", columnFamilies);
}
}
3) 指定一个表名,在HBase上删除该表
示例源码:
package testHBase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.TableName;
public class testHBase
{
//定义类的静态成员类对象
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
//将创建配置类和连接类的代码重构为静态代码块,有利于代码的复用
//使用 HBaseConfiguration 的静态方法创建一个Configuration类的对象
static
{
configuration = HBaseConfiguration.create();
configuration.set("hbase.zookeeper.quorum", "hadoop");
}
//使用 ConnectionFactory的静态方法创建一个Connection类的对象
static
{
try
{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}
catch (IOException e)
{
e.printStackTrace();
}
}
//实现删除一张表的方法,输入参数是表名,返回是否删除成功
public static boolean deleteTable(String tName) throws IOException
{
if(!admin.tableExists(TableName.valueOf(tName))) //如果表不存在,不执行删除直接返回
{
System.out.println("Table dose not exists");
return false;
}
//admin.disableTable(TableName.valueOf(tName));
if(!admin.isTableDisabled(TableName.valueOf(tName)))//如果表没有被禁用
{
admin.disableTable(TableName.valueOf(tName));//删除前先将表禁用
}
admin.deleteTable(TableName.valueOf(tName));
return true;
}
public static void main(String[] args) throws IOException
{
// 执行删除表的方法
boolean isDeleted = deleteTable("testTable2");
if(isDeleted)
{
System.out.println("Table deleted");
}
else
{
System.out.println("Table does not deleted");
}
}
}
4) 列出HBase上的所有表名
示例源码:
package testHBase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.TableName;
public class testHBase
{
//定义类的静态成员类对象
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
//将创建配置类和连接类的代码重构为静态代码块,有利于代码的复用
//使用 HBaseConfiguration 的静态方法创建一个Configuration类的对象
static
{
configuration = HBaseConfiguration.create();
configuration.set("hbase.zookeeper.quorum", "hadoop");
}
//使用 ConnectionFactory的静态方法创建一个Connection类的对象
static
{
try
{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}
catch (IOException e)
{
e.printStackTrace();
}
}
//实现列出所有的表名
public static void listTables() throws IOException
{
//返回表描述器的数组
HTableDescriptor[] tableDescriptor = admin.listTables();
// 循环打印所有的表名
for (int i=0; i
5) 修改表:增加、删除、修改表的列族
示例源码:
方法1:给指定表增加一个列族
方法2:修改指定表的指定列族
方法3:修改指定表的指定列族的属性(最大版本数VERSIONS 、压缩模式COMPRESSION ),修改后在HBase Shell执行desc命令查看表的列族属性:
{NAME => 'newColumnFamily', BLOOMFILTER => 'ROW', VERSIONS => '2147483647', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'GZ', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPL
ICATION_SCOPE => '0'}
package testHBase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.TableName;
public class testHBase
{
//定义类的静态成员类对象
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
//将创建配置类和连接类的代码重构为静态代码块,有利于代码的复用
//使用 HBaseConfiguration 的静态方法创建一个Configuration类的对象
static
{
configuration = HBaseConfiguration.create();
configuration.set("hbase.zookeeper.quorum", "hadoop");
}
//使用 ConnectionFactory的静态方法创建一个Connection类的对象
static
{
try
{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}
catch (IOException e)
{
e.printStackTrace();
}
}
//实现修改表的方法一:为表增加一个新列族
public static void alterTable(String tName, String newColumnFamily) throws IOException
{
//获取表名对象
TableName tableName = TableName.valueOf(tName);
//获取表的描述器
HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName);
//创建一个新的列族描述器
HColumnDescriptor columnDescriptor = new HColumnDescriptor(newColumnFamily);
//添加新列族描述器到表描述器
tableDescriptor.addFamily(columnDescriptor);
//提交修改表的操作
admin.modifyTable(tableName, tableDescriptor);
}
//实现修改表的方法二:删除表的一个指定列族
public static void alterTable1(String tName, String columnFamily) throws IOException
{
//获取表名对象
TableName tableName = TableName.valueOf(tName);
//获取表的描述器
HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName);
//表描述器中删除指定列族
tableDescriptor.removeFamily(Bytes.toBytes(columnFamily));
//提交修改表的操作
admin.modifyTable(tableName, tableDescriptor);
}
//实现修改表的方法三:修改表的列族的属性(最大版本数,压缩方式)
public static void alterTable2(String tName, String columnFamily) throws IOException
{
//获取表名对象
TableName tableName = TableName.valueOf(tName);
//获取表的描述器
HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName);
//创建一个新的列族描述器
HColumnDescriptor columnDescriptor = new HColumnDescriptor(columnFamily);
//修改压缩模式为GZ
columnDescriptor.setCompressionType(Algorithm.GZ);
//修改最大版本数为ALL_VERSIONS
columnDescriptor.setMaxVersions(HConstants.ALL_VERSIONS);
//修改列族
tableDescriptor.modifyFamily(columnDescriptor);
//提交修改表的操作
admin.modifyTable(tableName, tableDescriptor);
}
public static void main(String[] args) throws IOException
{
// 执行更改的方法,增加一个列族
alterTable("testTable", "newColumnFamily");
// 执行更改的方法,删除一个列族
// alterTable1("testTable", "newColumnFamily");
// //执行更改的方法,修改表的列族的属性:最大版本数和压缩模式
// alterTable2("testTable", "newColumnFamily");
}
}
6) 插入新的一列
指定表名、列族名、新列名、列值,插入新的一列,插入后在HBase Shell执行scan命令查看新增加的列
hbase(main):007:0* scan "testTable"
ROW COLUMN+CELL
row1 column=userinfo:age, timestamp=1586160189608, value=20
row1 column=userinfo:name, timestamp=1586160189595, value=Jack
row1 column=userinfo:sex, timestamp=1586160189613, value=male
row2 column=userinfo:age, timestamp=1586160189624, value=19
row2 column=userinfo:name, timestamp=1586160189620, value=Tom
package testHBase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.TableName;
public class testHBase
{
//定义类的静态成员类对象
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
//将创建配置类和连接类的代码重构为静态代码块,有利于代码的复用
//使用 HBaseConfiguration 的静态方法创建一个Configuration类的对象
static
{
configuration = HBaseConfiguration.create();
configuration.set("hbase.zookeeper.quorum", "hadoop");
}
//使用 ConnectionFactory的静态方法创建一个Connection类的对象
static
{
try
{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}
catch (IOException e)
{
e.printStackTrace();
}
}
//往指定表的指定列族插入一个新的列
public static void insertColulmn(String tName, String rowKey, String columnFamily, String column, String cellValue) throws IOException
{
//获取表名对象
TableName tableName = TableName.valueOf(tName);
//获取操作表的Table对象
Table table = connection.getTable(tableName);
///创建一个Put对象,指定行键rowKey
Put put = new Put(Bytes.toBytes(rowKey));
//增加一列,指定列族、列修饰符、单元格值
put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(cellValue));
//执行put方法,插入一列
table.put(put);
}
public static void main(String[] args) throws IOException
{
//执行更改的方法,增加一个列族
// alterTable("testTable", "userinfo");
//指定表名、行键、列族名、列名、值,插入新的一列
insertColulmn("testTable", "row1", "userinfo", "name", "Jack");
insertColulmn("testTable", "row1", "userinfo", "age", "20");
insertColulmn("testTable", "row1", "userinfo", "sex", "male");
insertColulmn("testTable", "row2", "userinfo", "name", "Tom");
insertColulmn("testTable", "row2", "userinfo", "age", "19");
}
}
7) 查询、扫描表
示例源码:
方法1:根据表名、行键查询某行所包含的所有列值
方法2:根据表名、行键、列族名、列名,查询指定列的单元格值
方法3:指定表名,扫描整个表
方法4:指定表名、列族名,扫描表的指定列族
package testHBase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.TableName;
public class testHBase
{
//定义类的静态成员类对象
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
//将创建配置类和连接类的代码重构为静态代码块,有利于代码的复用
//使用 HBaseConfiguration 的静态方法创建一个Configuration类的对象
static
{
configuration = HBaseConfiguration.create();
configuration.set("hbase.zookeeper.quorum", "hadoop");
}
//使用 ConnectionFactory的静态方法创建一个Connection类的对象
static
{
try
{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}
catch (IOException e)
{
e.printStackTrace();
}
}
//根据表名、行键查询行包含的所有列值
public static void getValues(String tName, String rowKey) throws IOException
{
//获取表名对象
TableName tableName = TableName.valueOf(tName);
//获取操作表的Table对象
Table table = connection.getTable(tableName);
///创建一个Get对象,指定行键rowKey
Get get = new Get(Bytes.toBytes(rowKey));
//执行get方法,获得指定列的指,返回结果
Result result = table.get(get);
Cell[] cells = result.rawCells();
for (Cell cell : cells)
{
System.out.println("rowkey:" + Bytes.toString(CellUtil.cloneRow(cell)) + ", column family:"
+ Bytes.toString(CellUtil.cloneFamily(cell)) + ", column:"
+ Bytes.toString(CellUtil.cloneQualifier(cell)) + ", value:"
+ Bytes.toString(CellUtil.cloneValue(cell)));
}
}
//根据表名、行键、列族名、列名,查询指定列的单元格值
public static void getCellValue(String tName, String rowKey, String columnFamily, String column) throws IOException
{
//获取表名对象
TableName tableName = TableName.valueOf(tName);
//获取操作表的Table对象
Table table = connection.getTable(tableName);
///创建一个Get对象,指定行键rowKey
Get get = new Get(Bytes.toBytes(rowKey));
//加入列族名、列名作为查询条件
get.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column));
//执行get方法,获得指定列的指,返回结果
Result result = table.get(get);
//返回单元格Cell数组
Cell[] cells = result.rawCells();
for (Cell cell : cells)
{
System.out.println("rowkey:" + Bytes.toString(CellUtil.cloneRow(cell)) + ", column family:"
+ Bytes.toString(CellUtil.cloneFamily(cell)) + ", column:"
+ Bytes.toString(CellUtil.cloneQualifier(cell)) + ", value:"
+ Bytes.toString(CellUtil.cloneValue(cell)));
}
}
//根据表名,扫描整个表
public static void scanTable(String tableName) throws IOException
{
//创建扫描器对象
Scan scan = new Scan();
//获取操作表的Table对象
Table table = connection.getTable(TableName.valueOf(tableName));
//获得用于检索数据的ResultScanner对象
ResultScanner resultScanner = table.getScanner(scan);
for (Result result : resultScanner)
{
// System.out.println(result);
Cell[] cells = result.rawCells();//返回单元格Cell数组
for(Cell cell : cells)
{
System.out.println("rowkey:" + Bytes.toString(CellUtil.cloneRow(cell)) + ", column family:"
+ Bytes.toString(CellUtil.cloneFamily(cell)) + ", column:"
+ Bytes.toString(CellUtil.cloneQualifier(cell)) + ", value:"
+ Bytes.toString(CellUtil.cloneValue(cell)));
}
}
}
//根据表名、列族名,,扫描表的指定列族
public static void scanTable(String tableName, String columnFamily) throws IOException
{
//创建扫描器对象
Scan scan = new Scan();
scan.addFamily(Bytes.toBytes(columnFamily));
//获取操作表的Table对象
Table table = connection.getTable(TableName.valueOf(tableName));
//获得用于检索数据的ResultScanner对象
ResultScanner resultScanner = table.getScanner(scan);
for (Result result : resultScanner)
{
// System.out.println(result);
Cell[] cells = result.rawCells();//返回单元格Cell数组
for(Cell cell : cells)
{
System.out.println("rowkey:" + Bytes.toString(CellUtil.cloneRow(cell)) + ", column family:"
+ Bytes.toString(CellUtil.cloneFamily(cell)) + ", column:"
+ Bytes.toString(CellUtil.cloneQualifier(cell)) + ", value:"
+ Bytes.toString(CellUtil.cloneValue(cell)));
}
}
}
public static void main(String[] args) throws IOException
{
getValues("testTable", "row1");
// getCellValue("testTable", "row1", "userinfo", "name");
// scanTable("testTable");
// scanTable("testTable", "userinfo");
}
}
8) 删除表的列值
示例源码:
方法1:根据表名、行键、列族名、列名删除指定的列值
方法2:根据表名、行键删除指定行的所有值
package testHBase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.TableName;
public class testHBase
{
//定义类的静态成员类对象
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
//将创建配置类和连接类的代码重构为静态代码块,有利于代码的复用
//使用 HBaseConfiguration 的静态方法创建一个Configuration类的对象
static
{
configuration = HBaseConfiguration.create();
configuration.set("hbase.zookeeper.quorum", "hadoop");
}
//使用 ConnectionFactory的静态方法创建一个Connection类的对象
static
{
try
{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}
catch (IOException e)
{
e.printStackTrace();
}
}
//根据表名、行键、列族名、列名,删除指定的列
public static void deleteColulmn(String tableName, String rowKey, String columnFamily, String column) throws IOException
{
//获取操作表的Table对象
Table table = connection.getTable(TableName.valueOf(tableName));
///创建一个Delete对象,指定要删除列的行键rowKey
Delete delete = new Delete(Bytes.toBytes(rowKey));
//增加一列,指定列族、列修饰符、单元格值
delete.addColumns(Bytes.toBytes(columnFamily), Bytes.toBytes(column));
//执行delete方法,删除一列
table.delete(delete);
}
//根据表名、行键,删除该行的所有列
public static void deleteRow(String tableName, String rowKey) throws IOException
{
//获取操作表的Table对象
Table table = connection.getTable(TableName.valueOf(tableName));
///创建一个Delete对象,指定要删除列的行键rowKey
Delete delete = new Delete(Bytes.toBytes(rowKey));
//执行delete方法,删除一列
table.delete(delete);
}
public static void main(String[] args) throws IOException
{
deleteColulmn("testTable", "row1", "userinfo", "name");
// scanTable("testTable");
}
}
9) 过滤器的用法
示例源码:
方法1:根据表名、行键,过滤出指定行键的所有列值
package testHBase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.TableName;
public class testHBase
{
//定义类的静态成员类对象
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
//将创建配置类和连接类的代码重构为静态代码块,有利于代码的复用
//使用 HBaseConfiguration 的静态方法创建一个Configuration类的对象
static
{
configuration = HBaseConfiguration.create();
configuration.set("hbase.zookeeper.quorum", "hadoop");
}
//使用 ConnectionFactory的静态方法创建一个Connection类的对象
static
{
try
{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}
catch (IOException e)
{
e.printStackTrace();
}
}
//根据表名、行键,过滤出指定行键的所有列值
public static void rowFilter(String tableName, String rowKey) throws IOException
{
Table table = connection.getTable(TableName.valueOf(tableName)); //获取操作表的Table对象
Scan scan = new Scan(); //创建扫描器对象
Filter rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(rowKey)));//创建行键过滤器,指定比较操作为相等,采用二进制比较
scan.setFilter(rowFilter);//设置过滤器为行过滤器
// Filter prefixFilter = new PrefixFilter(Bytes.toBytes(rowKey)); ////创建前缀过滤器,按照行键的前缀字符串进行比较过滤
// scan.setFilter(prefixFilter);//设置过滤器为前缀过滤器
ResultScanner resultScanner = table.getScanner(scan);//获得用于检索数据的ResultScanner对象
for (Result result : resultScanner)
{
Cell[] cells = result.rawCells();//返回单元格Cell数组
for(Cell cell : cells)
{
System.out.println("rowkey:" + Bytes.toString(CellUtil.cloneRow(cell)) + ", column family:"
+ Bytes.toString(CellUtil.cloneFamily(cell)) + ", column:"
+ Bytes.toString(CellUtil.cloneQualifier(cell)) + ", value:"
+ Bytes.toString(CellUtil.cloneValue(cell)) + ", timestamp:"
+ cell.getTimestamp());
}
}
}
public static void main(String[] args) throws IOException
{
rowFilter("students", "row1");
}
}
方法2:根据表名、列名前缀,过滤出前缀匹配的列的所有列值
方法3:根据表名、列族名、列名、列值,过滤出指定列值的列
package testHBase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.TableName;
public class testHBase
{
//定义类的静态成员类对象
public static Configuration configuration;
public static Connection connection;
public static Admin admin;
//将创建配置类和连接类的代码重构为静态代码块,有利于代码的复用
//使用 HBaseConfiguration 的静态方法创建一个Configuration类的对象
static
{
configuration = HBaseConfiguration.create();
configuration.set("hbase.zookeeper.quorum", "hadoop");
}
//使用 ConnectionFactory的静态方法创建一个Connection类的对象
static
{
try
{
connection = ConnectionFactory.createConnection(configuration);
admin = connection.getAdmin();
}
catch (IOException e)
{
e.printStackTrace();
}
}
//根据表名、列名前缀,过滤出前缀匹配的列的所有列值
public static void columnFilter(String tableName, String column) throws IOException
{
Table table = connection.getTable(TableName.valueOf(tableName)); //获取操作表的Table对象
Scan scan = new Scan(); //创建扫描器对象
Filter columnPrefixFilter = new ColumnPrefixFilter(Bytes.toBytes("Eng"));//创建列前缀过滤器
scan.setFilter(columnPrefixFilter);//设置过滤器为列前缀过滤器
ResultScanner resultScanner = table.getScanner(scan);//获得用于检索数据的ResultScanner对象
for (Result result : resultScanner)
{
Cell[] cells = result.rawCells();//返回单元格Cell数组
for(Cell cell : cells)
{
System.out.println("rowkey:" + Bytes.toString(CellUtil.cloneRow(cell)) + ", column family:"
+ Bytes.toString(CellUtil.cloneFamily(cell)) + ", column:"
+ Bytes.toString(CellUtil.cloneQualifier(cell)) + ", value:"
+ Bytes.toString(CellUtil.cloneValue(cell)) + ", timestamp:"
+ cell.getTimestamp());
}
}
}
//根据表名、列族名、列名、列值,过滤出指定列值的列
public static void singleColumnFilter(String tableName, String columnFamily, String column, String value) throws IOException
{
Table table = connection.getTable(TableName.valueOf(tableName)); //获取操作表的Table对象
Scan scan = new Scan(); //创建扫描器对象
SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter(Bytes.toBytes(columnFamily), Bytes.toBytes(column), CompareFilter.CompareOp.EQUAL, Bytes.toBytes(value));//创建单列值前缀过滤器,二进制比较,完全相等
// SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter(Bytes.toBytes(columnFamily), Bytes.toBytes(column), CompareFilter.CompareOp.EQUAL, new SubstringComparator(value));//创建单列值前缀过滤器,子字符串比较
singleColumnValueFilter.setFilterIfMissing(true);
scan.setFilter(singleColumnValueFilter);//设置过滤器为列前缀过滤器
scan.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(column));//指定要扫描的列族名、列值
ResultScanner resultScanner = table.getScanner(scan);//获得用于检索数据的ResultScanner对象
for (Result result : resultScanner)
{
Cell[] cells = result.rawCells();//返回单元格Cell数组
for(Cell cell : cells)
{
System.out.println("rowkey:" + Bytes.toString(CellUtil.cloneRow(cell)) + ", column family:"
+ Bytes.toString(CellUtil.cloneFamily(cell)) + ", column:"
+ Bytes.toString(CellUtil.cloneQualifier(cell)) + ", value:"
+ Bytes.toString(CellUtil.cloneValue(cell)) + ", timestamp:"
+ cell.getTimestamp());
}
}
}
public static void main(String[] args) throws IOException
{
columnFilter("students", "Eng");
//singleColumnFilter("students", "info", "name", "Jack");
}
}