HBase常用基础知识
1.hbase常用的shell命令
## HBase有两个内置的namespace:
list_namespace
##创建namespace
create_namespace "test"
## 删除 namespace
drop_namespace 'test_ns'
##查询指定namespace下面的表
list_namespace_tables 'dingtalk'
##查询指定namespace信息
describe_namespace "dingtalk"
##查询指定namespace下面表的数据
scan 'dingtalk:website'
##查询表行数 一般不用
count 'dingtalk:website'
## 数据量大的时候使用mapreduce查询数量
$HBASE_HOME/bin/hbase org.apache.hadoop.hbase.mapreduce.RowCounter 'HtableName'
## 查看表结构:
desc 'dingtalk:website'
## 按照rowKey查询数据
get 'dingtalk:website','b9cb51e146794ffba8d838335ca300fb'
get 'dingtalk:website','b9cb51e146794ffba8d838335ca300fb',{
FORMATTER=>'toString'}
## 按照rowKey查询数据 解决乱码
get 'my_test','0011','info:ch:toString'
## 清空整张表: hbase是先将掉disable掉,然后drop掉后重建表来实现truncate的功能的
truncate 'dingtalk:website'
## 禁用hbase表
disable 'dingtalk:website'
## 禁用hbase表
enable 'dingtalk:website'
# 修改版本号
alter 'dingtalk:website', NAME => 'info', VERSIONS => 9999
## 删除hbase表
drop 'dingtalk:website'
## 判断表是否enable:
is_enabled 'dingtalk:website'
## 判断表是否disable:
is_disabled 'dingtalk:website'
## 创建表并且指定region
create 'dingtalk:website',{
NAME=>'info',VERSIONS=>3, TTL => 2592000,COMPRESSION => 'SNAPPY'},{
NUMREGIONS=>10,SPLITALGO=>'HexStringSplit',REGION_REPLICATION=>2,MIN_VERSIONS => '0'}
## 创建表并且指定region
create 'dingtalk:website1',{
NAME=>'info',VERSIONS=>3,BLOCKCACHE=>true,BLOOMFILTER=>'ROW',COMPRESSION=>'SNAPPY'},{
SPLITS => ['20','40','60','80']}
## 判断表是否enable:
flush 'dingtalk:website'
disable 'table'
enable 'table'
## 修改版本号
alter 'dingtalk:trade_mark', NAME => 'info', VERSIONS => 9999
## 查询5条数据
scan 'dingtalk:patent',{
FORMATTER=>'toString',LIMIT=>5}
## 查询指定列数据
scan 'dingtalk:patent',{
FORMATTER=>'toString',LIMIT=>5,COLUMNS=>['info:name','info:age']}
## 使用scan 按照rowkey前缀匹配
scan 'dingtalk:patent',{
ROWPREFIXFILTER=>'0151099104887566931',FORMATTER=>'toString',LIMIT=>5,COLUMNS=>['info:name','info:age']}
## 前缀匹配
scan 'dingtalk:patent',{
FILTER=>"PrefixFilter('0151099104887566931')"}
## 范围查询
scan 'dingtalk:patent',{
STARTROW=>'7CN2103801',STOPROW=>'7CN2103802'}
## 单列过滤器
scan 'dingtalk:patent',{
FILTER=>"SingleColumnValueFilter('info','name',=,'binary:张三')",FORMATTER=>'toString'}
## 单列组合过滤
scan 'dingtalk:patent',{
FILTER=>"SingleColumnValueFilter('info','name',=,'binary:你好2') AND SingleColumnValueFilter('info','age',>,'binary:18')",FORMATTER=>'toString'}
## 增加数据
put 'test:user_info','001001001','F1:real_name','我是HBase'
## 扫描表user_info的COLUMNS=F1的前10条数据
scan 'test:user_info',{
COLUMNS=>'F1',LIMIT=>5}
## 扫描表user_info的版本号大于5的所有数据
scan 'test:user_info',{
VERSIONS=>5}
## 最近两个版本 删除的数据也会显示出
scan 'my_test',{
RAW=>TRUE,VERSIONS=>2}
## 删除表user_info中rowkey=’00200000011’,column=’F1:address’的数据
delete 'test:user_info','00200000011','F1:address'
## 删除行数据(deleteall)
deleteall 'test:user_info','00200000011'
## 删除表的所有数据(truncate) 具体过程是:disable table -> drop table -> create table
truncate 'test:user_info'
#增加列簇
create 'test:user_info','c2'
#删除列簇
create 'test:user_info','c2'
## 快照
1) 查看所有快照 list_snapshots
2)查看表的所有快照 list_table_snapshots
3) 创建快照 snapshot 'test:user_info', 'snapshot.user_info.20190726'
4)删除快照 delete_snapshot 'snapshot.user_info.20190726'
## 在shell窗口执行hbase脚本
hbase shell /export/software/hbaseScript.rb
2. hbase bulkload
// A code block
## 1) 把csv文件转换成 hfile文件
hbase org.apache.hadoop.hbase.mapreduce.ImportTsv \
'-Dimporttsv.separator=,' \
-Dimporttsv.bulk.output=/tmp/test/hfile \
-Dimporttsv.columns=HBASE_ROW_KEY,info:en,info:ch my_test /tmp/test/test.tsv
## 2) hfile文件 文件导入到hbase
HADOOP_CLASSPATH=`/opt/cloudera/parcels/CDH-6.3.2-1.cdh6.3.2.p0.1605554/lib/hbase/bin/hbase mapredcp` \
hadoop jar /opt/cloudera/parcels/CDH/lib/hbase/hbase-mapreduce.jar \
completebulkload /tmp/test/hfile default:my_test
3.hive映射Hbase
CREATE external TABLE `hbase_website`(
`key` string,
`ocid` string,
`companyname` string,
`createtime` bigint,
`updatetime` bigint,
`sitename` string,
`number` string,
`homeurl` string,
`checkdate` string,
`domain` string)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,info:ocid,info:companyname,info:createtime,info:updatetime,info:sitename,info:number,info:homeurl,info:checkdate,info:domain")
TBLPROPERTIES("hbase.table.name" = "dingtalk:website");