近些日,想通过Docker深入了解下Hbase,但是发现,这方面的博文内容分散各处,良莠不齐,故把近期的学习心得总结如下,以飨读者。
本博文侧重于实验理解,原理可以参考:
https://blog.csdn.net/qq_26803795/article/details/106239302
准备工作:Docker
Windows安装Docker:直接官网下载;
Linux安装Docker:https://www.jianshu.com/p/2dae7b13ce2f
WSL2安装Docker:https://blog.csdn.net/qq_36743482/article/details/107623321
docker search hbase
docker pull harisekhon/hbase
docker run -d -p 2181:2181 -p 8080:8080 -p 8085:8085 -p 9090:9090 -p 9095:9095 -p 16000:16000 -p 16010:16010 -p 16201:16201 -p 16301:16301 -p 16030:16030 -p 16020:16020 --name hbase001 harisekhon/hbase
这里需要提下,一定要做宿主机和镜像端口间的映射,这样才可以在本机正常访问。这里做这么多端口的映射是为了后续通过Java API操作Hbase(这也是我踩了很多坑才总结出来的)。
访问localhost:16010,界面如下:
这里有个问题,一定要注意下,我们直接点击上图红框中的ServerName,看下是否能正常访问。
一般情况下不能正常访问,需要在hosts文件(Win:C:\Windows\System32\drivers\etc\hosts Linux:/etc/hosts)中配置如下信息(重要):
127.0.0.1 上图红框中的字符串(容器ID)
注意:如果你是搭建在远程Docker上的,你需要做的是在本地hosts配置
远程主机IP 容器ID
再次刷新,发现可以正常访问,至此,算是完成了Docker Hbase的安装。
Hbase说到底,是数据库,因而增删改查是必须的。
首先通过终端进入Docker:
docker exec -it hbase001 bash
进入Hbase shell
hbase shell
我们可以看到进入了Hbase shell,如下图:
在进入增 这个环节之前,我们需要创建一张表,通过help create
命令可以查看create命令的详情。
下面,我们创建一个namespace为default,表名为test,列族column family为cf的表:
create 'test','cf'
通过list
命令可以查看所有的表:
删除表:
首先需要disable table,然后才能删除表:
put 'test','1','cf:name','arvin'
put 'test','1','cf:age','18'
put 'test','2','cf:name','hbase'
put 'test','2','cf:age','20'
put 'test','3','cf:name','hadoop'
put 'test','3','cf:age','22'
put 'test','11','cf:name','spark'
put 'test','11','cf:age','24'
put 'test','21','cf:name','hive'
put 'test','21','cf:age','30'
deleteall 'test','3'; -- 删除该rowkey下的所有数据
delete 'test','3','cf:name' -- 删除某一cell的数据
改即为增
Hbase的查询有两种方式
get 'test','1';
get 'test','1','cf:name
scan 'test'
单个字段
scan 'test',{COLUMNS=>'cf:age'}
多个字段
scan 'test',{COLUMNS=>['cf:name','cf:age']}
rowkey以 1 开头的:
scan 'test',{FILTER=>"PrefixFilter('1')"}
rowkey包含1的:
scan 'test',{FILTER=>"RowFilter(=,'substring:1')"}
查找字段n开头,且值包含r的数据:
scan 'test',{FILTER=>"ColumnPrefixFilter('n') AND ValueFilter(=,'substring:r')"}
根据rowkey范围查询:
rowkey 属于 [‘1’,‘11’) 范围的(这里是字符串,故是字段排序)。
scan 'test',{STARTROW=>'1',ENDROW=>'11'}
根据时间戳范围查询:
scan 'test',{FILTER=>"TimestampsFilter(1595994749984,1595994750032)"}
scan 'test',{LIMIT=>2,REVERSED=>true}
以上仅仅是基本介绍,详细操作,可去Hbase官网学习。
下面不如激动人心的Java API 访问Hbase环节。
<dependency>
<groupId>org.apache.hbasegroupId>
<artifactId>hbase-clientartifactId>
<version>2.1.3version>
dependency>
主类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* @author arvin
*/
public class HbaseTest {
private static Admin admin;
private static final String COLUMNS_FAMILY_1 = "cf1";
private static final String COLUMNS_FAMILY_2 = "cf2";
public static Connection initHbase() throws IOException {
Configuration configuration = HBaseConfiguration.create();
configuration.set("hbase.zookeeper.quorum", "127.0.0.1");
configuration.set("hbase.zookeeper.property.clientPort", "2181");
configuration.set("hbase.master", "127.0.0.1:60000");
Connection connection = ConnectionFactory.createConnection(configuration);
return connection;
}
//创建表 create
public static void createTable(TableName tableName, String[] cols) throws IOException {
admin = initHbase().getAdmin();
if (admin.tableExists(tableName)) {
System.out.println("Table Already Exists!");
} else {
HTableDescriptor hTableDescriptor = new HTableDescriptor(tableName);
for (String col : cols) {
HColumnDescriptor hColumnDescriptor = new HColumnDescriptor(col);
hTableDescriptor.addFamily(hColumnDescriptor);
}
admin.createTable(hTableDescriptor);
System.out.println("Table Create Successful");
}
}
public static TableName getTbName(String tableName) {
return TableName.valueOf(tableName);
}
// 删除表 drop
public static void deleteTable(TableName tableName) throws IOException {
admin = initHbase().getAdmin();
if (admin.tableExists(tableName)) {
admin.disableTable(tableName);
admin.deleteTable(tableName);
System.out.println("Table Delete Successful");
} else {
System.out.println("Table does not exist!");
}
}
//put 插入数据
public static void insertData(TableName tableName, Student student) throws IOException {
Put put = new Put(Bytes.toBytes(student.getId()));
put.addColumn(Bytes.toBytes(COLUMNS_FAMILY_1), Bytes.toBytes("name"), Bytes.toBytes(student.getName()));
put.addColumn(Bytes.toBytes(COLUMNS_FAMILY_1), Bytes.toBytes("age"), Bytes.toBytes(student.getAge()));
initHbase().getTable(tableName).put(put);
System.out.println("Data insert success:" + student.toString());
}
// delete 删除数据
public static void deleteData(TableName tableName, String rowKey) throws IOException {
Delete delete = new Delete(Bytes.toBytes(rowKey)); // 指定rowKey
// delete = delete.addColumn(Bytes.toBytes(COLUMNS_FAMILY_1), Bytes.toBytes("name")); // 指定column,也可以不指定,删除该rowKey的所有column
initHbase().getTable(tableName).delete(delete);
System.out.println("Delete Success");
}
// scan数据
public static List<Student> allScan(TableName tableName) throws IOException {
ResultScanner results = initHbase().getTable(tableName).getScanner(new Scan().addFamily(Bytes.toBytes("cf1")));
List<String> list = new ArrayList<>();
for (Result result : results) {
Student student = new Student();
for (Cell cell : result.rawCells()) {
String colName = Bytes.toString(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
String value = Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
}
}
return null;
}
// 根据rowkey get数据
public static Student singleGet(TableName tableName, String rowKey) throws IOException {
Student student = new Student();
student.setId(rowKey);
Get get = new Get(Bytes.toBytes(rowKey));
if (!get.isCheckExistenceOnly()) {
Result result = initHbase().getTable(tableName).get(get);
for (Cell cell : result.rawCells()) {
String colName = Bytes.toString(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
String value = Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
switch (colName) {
case "name":
student.setName(value);
break;
case "age":
student.setAge(value);
break;
default:
System.out.println("unknown columns");
}
}
}
System.out.println(student.toString());
return student;
}
// 查询指定Cell数据
public static String getCell(TableName tableName, String rowKey, String cf, String column) throws IOException {
Get get = new Get(Bytes.toBytes(rowKey));
String rst = null;
if (!get.isCheckExistenceOnly()) {
get = get.addColumn(Bytes.toBytes(cf), Bytes.toBytes(column));
try {
Result result = initHbase().getTable(tableName).get(get);
byte[] resByte = result.getValue(Bytes.toBytes(cf), Bytes.toBytes(column));
rst = Bytes.toString(resByte);
} catch (Exception exception) {
System.out.printf("columnFamily or column does not exists");
}
}
System.out.println("Value is: " + rst);
return rst;
}
public static void main(String[] args) throws IOException {
Student student = new Student();
student.setId("1");
student.setName("Arvin");
student.setAge("18");
String table = "student";
// createTable(getTbName(table), new String[]{COLUMNS_FAMILY_1, COLUMNS_FAMILY_2});
// deleteTable(getTbName(table));
// insertData(getTbName(table), student);
// deleteData(getTbName(table), "1");
// singleGet(getTbName(table), "2");
getCell(getTbName(table), "2", "cf1", "name");
}
}
Student类
/**
* @author Arvin
*/
public class Student {
private String id;
private String name;
private String age;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getAge() {
return age;
}
public void setAge(String age) {
this.age = age;
}
@Override
public String toString() {
return "Student{" +
"id='" + id + '\'' +
", name='" + name + '\'' +
", age=" + age +
'}';
}
}