1.hbase数据关联hive外部表
create external table hive_hbase_test1(id string,address string,age int,gender string,name string)
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties("hbase.columns.mapping"=":key,info:address,info:age,info:gender,info:name")
tblproperties("hbase.table.name"="hbase_table");
2.hive beeline导出到本地文件
beeline -u jdbc:hive2://192.168.21.110:2166 -d org.apache.hive.jdbc.HiveDriver --showHeader=false --outputformat=txt -e "select * from hive_hbase_test1" > f2.txt
showHeader=false 不要头部,纯净模式
outputformat=txt 文件格式
f2.txt文件用 | 隔开
3.本地文件上传hdfs
hadoop fs -put /home/user/f2.txt /tmp/
4.hdfs导入hive
先创建表(注意指定表的数据格式)STORED AS TEXTFILE
create table test_2000(id string,collect_time string,create_time string,status string,value string)
row format delimited fields terminated by '|'
STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
load命令导入hive
beeline -u jdbc:hive2://192.168.1.110:2066 -e "load data inpath '/home/f2.txt' into table test_2000;"
hive表数据格式参考:https://blog.csdn.net/TC_HaoShuai/article/details/84303140
5.hive创建中间表关联habse
CREATE TABLE hive_hbase_test_2000(id string,collect_time string,create_time string,status string,value string)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,info:address,info:age,info:gender,info:name")
TBLPROPERTIES ("hbase.table.name" = "test_2000",
"hbase.mapred.output.outputtable" = "test_2000");
6.hive原数据往hive中间表导入
insert into table hive_hbase_test_2000 select * from test_2000;
查看hbase表也有了数据 scan 'test_2000' ,但是这种情况删除外部表hbase表也会没有
7.避免hbase表与hive强关联
可以先创建hbase表
create 'test_2000','info'
然后hive外部关联hbase表,
CREATE external TABLE hive_hbase_test_2000(id string,collect_time string,create_time string,status string,value string)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,info:address,info:age,info:gender,info:name")
TBLPROPERTIES ("hbase.table.name" = "test_2000",
"hbase.mapred.output.outputtable" = "test_2000");