Hbase学习笔记(三)——Shell + Java API

Hbase——Shell + Java API

  • 一、Hbase的shell操作
  • 二、Hbase的Java API操作
    • 1. 创建maven工程,导入jar包
    • 2. DDL操作
    • 3. DML操作
      • 3.1 Get
      • 3.2 Put
      • 3.3 Delete
      • 3.4 Scan
    • 4. 过滤器

一、Hbase的shell操作

命名 描述
Group name:general
help ‘命名名’ 查看命令的使用描述
whoami 查看当前用户
version 查看hbase版本信息
status 查看hbase集群的状态信息
table_help 查看表的帮助文档
Group name:namespace
list_namespace 列举当前所有的namespace
create_namespace 创建namespace
drop_namespace 删除namespace
describe_namespace 查看namespace的信息
alter_namespace 修改namespace
list_namespace_tables 列举指定namespace下的所有表
Group name:DDL
alter 修改列族
create 创建表
describe 查看表相关的详细信息
enable 启用表
disable 禁用表
is_enabled 判断表是否被启用
is_disabled 判断表是否被禁用
drop 删除表
exists 判断表是否存在
list 列举表
Group name: DML
put 用于向表中插入 [一列] 数据
get 从表中获取一条rowkey数据
scan 从表中获取多条rowkey数据
delete 删除一条数据
  • 注意事项
  1. 命令结尾不要加分号
  2. shell 命令行默认是向后删除,Ctrl+backspace 是向前删除
  3. 一旦不小心进入错误命令行,需要马上输入一条正确的指令回到正确的命令行,否则需要强制终止进程,重新进入

二、Hbase的Java API操作

1. 创建maven工程,导入jar包

	<repositories>
        <repository>
            <id>clouderaid>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos/url>
        repository>
    repositories>

    <dependencies>
        <dependency>
            <groupId>org.apache.hadoopgroupId>
            <artifactId>hadoop-clientartifactId>
            <version>2.6.0-mr1-cdh5.14.0version>
        dependency>
        <dependency>
            <groupId>org.apache.hbasegroupId>
            <artifactId>hbase-clientartifactId>
            <version>1.2.0-cdh5.14.0version>
        dependency>
        <dependency>
            <groupId>org.apache.hbasegroupId>
            <artifactId>hbase-serverartifactId>
            <version>1.2.0-cdh5.14.0version>
        dependency>
        <dependency>
            <groupId>junitgroupId>
            <artifactId>junitartifactId>
            <version>4.12version>
            <scope>testscope>
        dependency>
        <dependency>
            <groupId>org.testnggroupId>
            <artifactId>testngartifactId>
            <version>6.14.3version>
            <scope>testscope>
        dependency>
    dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.pluginsgroupId>
                <artifactId>maven-compiler-pluginartifactId>
                <version>3.0version>
                <configuration>
                    <source>1.8source>
                    <target>1.8target>
                    <encoding>UTF-8encoding>
                    
                configuration>
            plugin>
            <plugin>
                <groupId>org.apache.maven.pluginsgroupId>
                <artifactId>maven-shade-pluginartifactId>
                <version>2.2version>
                <executions>
                    <execution>
                        <phase>packagephase>
                        <goals>
                            <goal>shadegoal>
                        goals>
                        <configuration>
                            <filters>
                                <filter>
                                    <artifact>*:*artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SFexclude>
                                        <exclude>META-INF/*.DSAexclude>
                                        <exclude>META-INF/*/RSAexclude>
                                    excludes>
                                filter>
                            filters>
                        configuration>
                    execution>
                executions>
            plugin>
        plugins>
    build>

2. DDL操作

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import java.io.IOException;

/**
 * @author AidenBrett
 */
public class HbaseDDL {
    public static void main(String[] args) throws IOException {
        //创建configuration对象,用于管理当前程序的配置
        Configuration conf = HBaseConfiguration.create();
        //指定连接服务器的地址:zookeeper的地址
        conf.set("hbase.zookeeper.quorum","node-1:2181,node-2:2181,node-3:2181");
        //构建连接
        Connection conn = ConnectionFactory.createConnection(conf);
        //构建hbase的管理员
        HBaseAdmin admin = (HBaseAdmin) conn.getAdmin();
        //通过管理员创建namespace
        NamespaceDescriptor descriptor = NamespaceDescriptor.create("hbase").build();
        admin.createNamespace(descriptor);

        //创建表:create 'tbname','cf'
        String tbName = "hbase:tbname";
        //先判断表是否存在,存在就删除
        if (admin.tableExists(tbName)) {
            //如果存在,先禁用,再删除
            admin.disableTable(tbName);
            admin.deleteTable(tbName);
        }
        //构建表的描述
        HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tbName));
        //添加列族
        HColumnDescriptor family = new HColumnDescriptor("cf".getBytes());
        //更改列族属性
        family.setMaxVersions(3);//设置版本数
        family.setBlockCacheEnabled(true);//开启缓存
        desc.addFamily(family);
        //创建表
        admin.createTable(desc);
        //关闭资源
        admin.close();
        conn.close();
    }
}
  • 注意事项:一般而言,DDL操作都是在shell客户端完成

3. DML操作

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.jupiter.api.Test;

import java.io.IOException;

/**
 * @author AidenBrett
 */
public class HbaseDML {
    /**
     * 将“构建连接对象”操作进行封装
     *
     * @return connect
     * @throws IOException
     */
    private static Table connect() throws IOException {
        //创建configuration对象,用于管理当前程序的配置
        Configuration conf = HBaseConfiguration.create();
        //指定连接服务器的地址:zookeeper的地址
        conf.set("hbase.zookeeper.quorum", "node-1:2181,node-2:2181,node-3:2181");
        //构建连接
        Connection conn = ConnectionFactory.createConnection(conf);
        //构建表的对象
        return conn.getTable(TableName.valueOf("ns:tbname"));
    }

	/**
     * 将”关闭所有连接“操作进行封装
     *
     * @param table
     * @throws IOException
     */
    private void close(Table table) throws IOException {
        table.close();
        connect().close();
    }
    
    //get
    //put
    //delete
    //scan
}

3.1 Get

    /**
     * get:
     * 		get 'ns:tbname','rowkey'
     * 		get 'ns:tbname','rowkey','cf'
     * 		get 'ns:tbname','rowkey','cf:col'
     *
     * @throws IOException
     */
    @Test
    public void get() throws IOException {
        Table table = connect();
        //构建get对象
        Get get = new Get("rowkey".getBytes());
        //配置get对象(根据具体情况选择)
//        get.addFamily("cf".getBytes());//指定查询对应列族的数据
//        get.addColumn("cf".getBytes(), "col".getBytes());//指定查询具体的某一列的数据
        //result里面就是应该rowkey的数据
        Result result = table.get(get);
        //一个cell就是一列
        for (Cell cell : result.rawCells()) {
            System.out.println(
                    Bytes.toString(CellUtil.cloneFamily(cell)) + "\t" +
                            Bytes.toString(CellUtil.cloneQualifier(cell)) + "\t" +
                            Bytes.toString(CellUtil.cloneValue(cell)) + "\t" +
                            cell.getTimestamp()
            );
        }
        close(table);
    }

3.2 Put

	/**
     * put:
     * 		put 'ns:tbname','rowkey','cf:col','value'
     *
     * @throws IOException
     */
    @Test
    public void put() throws IOException {
        Table table = connect();
        //构建get对象
        Put put = new Put("rowkey".getBytes());
        //配置put对象
        put.addColumn("cf".getBytes(), "col".getBytes(), "value".getBytes());
        table.put(put);
        close(table);
    }

3.3 Delete

	/**
     * delete:
     * 		delete 'ns:tbname','rowkey','cf:col'
     *
     * @throws IOException
     */
    @Test
    public void delete() throws IOException {
        Table table = connect();
        //构建delete对象
        Delete delete = new Delete("rowkey".getBytes());
        //配置delete对象
        //删除该列最新版本的值
        delete.addColumn("cf".getBytes(), "col".getBytes());
        //删除该列所有版本的值
        delete.addColumns("cf".getBytes(), "col".getBytes());
        table.delete(delete);
        close(table);
    }

3.4 Scan

    /**
     * scan
     * 		scan 'tbname'
     * 		scan + filter
     *
     * @throws IOException
     */
    @Test
    public void scan() throws IOException {
        Table table = connect();
        //构建scan对象
        Scan scan = new Scan();
        //执行scan
        ResultScanner scanner = table.getScanner(scan);//ResultScanner就是一个Result对象的集合
        for (Result result : scanner) {
            //每次循环得到一个rowkey
            //先打印该rowkey
            System.out.println(Bytes.toString(result.getRow()));
            //然后打印该rowkey中所有的列
            for (Cell cell : result.rawCells()) {
                System.out.println(
                        Bytes.toString(CellUtil.cloneFamily(cell)) + "\t" +
                                Bytes.toString(CellUtil.cloneQualifier(cell)) + "\t" +
                                Bytes.toString(CellUtil.cloneValue(cell)) + "\t" +
                                cell.getTimestamp()
                );
            }
            System.out.println();
        }
    }

4. 过滤器

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.jupiter.api.Test;

import java.io.IOException;

/**
 * @author AidenBrett
 */
public class HbaseFilter {
    /**
     * 将“构建连接对象”进行封装
     *
     * @return connect
     * @throws IOException
     */
    private static Table connect() throws IOException {
        //创建configuration对象,用于管理当前程序的配置
        Configuration conf = HBaseConfiguration.create();
        //指定连接服务器的地址:zookeeper的地址
        conf.set("hbase.zookeeper.quorum", "cluster01:2181,cluster02:2181,cluster03:2181");
        //构建连接
        Connection conn = ConnectionFactory.createConnection(conf);
        //构建表的对象
        return conn.getTable(TableName.valueOf("student:stu_info"));
    }

    /**
     * scan
     * 		scan 'tbname'
     * 		scan + filter
     *
     * @throws IOException
     */
    @Test
    public void scan() throws IOException {
        Table table = connect();
        //构建scan对象
        Scan scan = new Scan();

        //最常见的最简单的过滤器:start & stop
        scan.setStartRow("rowkey".getBytes());//设置起始位置,包含
        scan.setStopRow("rowkey".getBytes());//设置结束位置,不包含

        /**
         * 第一种:比较过滤器,实现比较rowkey、列族、列
         *      只返回符合该条件的列
         */
        //rowkey过滤器
        Filter rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator("20191027_001".getBytes()));
        //列族过滤器
        Filter familyFilter = new FamilyFilter(CompareFilter.CompareOp.NOT_EQUAL, new BinaryComparator("basic".getBytes()));
        //列标签过滤器
        Filter columnFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("a"));
        //值的过滤器
        ValueFilter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("lao"));

        /**
         * 第二种:专用过滤器
         *      返回符合条件的整行
         */
        //单列值过滤器:某个列族下的某个列的值所对应的rowkey的所有数据
        Filter singleColumnValueExcludeFilter = new SingleColumnValueExcludeFilter(
                "basic".getBytes(),//指定列族
                "name".getBytes(),//指定列名称
                CompareFilter.CompareOp.EQUAL,//比较器
                "lao".getBytes()//具体需要匹配的值
        );

        //多列过滤器:一张表有很多列,只想要其中对的几列
        byte[][] prefixes = {
                "name".getBytes(),
                "age".getBytes()
        };
        Filter multipleColumnPrefixFilter = new MultipleColumnPrefixFilter(prefixes);
        //rowkey的前缀过滤器
        PrefixFilter prefixFilter = new PrefixFilter("201910".getBytes());
        
        //分页过滤器:在工作中都会给定起始位置
        scan.setStartRow("20191027_001".getBytes());
        Filter pageFilter = new PageFilter(3);

        //组合过滤器:多个过滤器一起使用
        FilterList filterList = new FilterList();
        filterList.addFilter(singleColumnValueExcludeFilter);
        filterList.addFilter(multipleColumnPrefixFilter);
        
        //scan加载过滤器
        scan.setFilter(rowFilter);
        //执行scan
        ResultScanner scanner = table.getScanner(scan);//ResultScanner就是一个Result对象的集合
        for (Result result : scanner) {
            //每次循环得到一个rowkey
            //先打印该rowkey
            System.out.println(Bytes.toString(result.getRow()));
            //然后打印该rowkey中所有的列
            for (Cell cell : result.rawCells()) {
                System.out.println(
                        Bytes.toString(CellUtil.cloneFamily(cell)) + "\t" +
                                Bytes.toString(CellUtil.cloneQualifier(cell)) + "\t" +
                                Bytes.toString(CellUtil.cloneValue(cell)) + "\t" +
                                cell.getTimestamp()
                );
            }
            System.out.println();
        }
        close(table);
    }

    /**
     * 关闭所有连接
     *
     * @param table
     * @throws IOException
     */
    private void close(Table table) throws IOException {
        table.close();
        connect().close();
    }
}

你可能感兴趣的:(Hbase,Hbase,学习笔记)