问题
1,创建带分区
CREATE TABLE hbase_table_2(key int, value string) PARTITIONED BY (date String) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf1:val") TBLPROPERTIES ("hbase.table.name" = "xyz2");
(1) , insert into hbase_table_2 partition (date) select body_bytes_sent,remotea_ddr,date from access_log limit 10;
不报错 ,但表没数据
(2),insert into hbase_table_2 partition (date =‘201212’) select body_bytes_sent,remotea_ddr from access_log limit 10;
有数据,hbase 没分区date 数据
(3),插入xyz2 put ‘xyz2’, 123, ‘cf1:val’, ‘1.1.1.1’ 对hive 所有分区都有这条数据了
实例 ,2种情况
[root@fuze245 ~]# beeline
Beeline version 1.1.0-cdh5.5.0 by Apache Hive
beeline> !connect jdbc:hive2://fuze245:10000
一,创建hive表 hbase 自动创建
创建
0: jdbc:hive2://fuze245:10000> CREATE TABLE hbase_table(key int, value string)STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf1:val") TBLPROPERTIES ("hbase.table.name" = "xyz");
准备的数据
0: jdbc:hive2://fuze245:10000> select body_bytes_sent,remotea_ddr from access_log limit 10; +------------------+------------------+--+
| body_bytes_sent | remotea_ddr | +------------------+------------------+--+
| 1103 | 121.201.34.136 |
| 397 | 113.208.116.106 |
| 2165 | 121.201.34.136 |
| 587 | 113.208.116.106 |
| 318 | 121.201.34.136 |
| 318 | 113.208.116.106 |
| 60 | 121.201.34.136 |
| 60 | 113.208.116.106 |
| 318 | 121.201.34.136 |
| 9566 | 120.24.89.125 | +------------------+------------------+--+
插入
0: jdbc:hive2://fuze245:10000> insert into hbase_table select body_bytes_sent,remotea_ddr from access_log limit 10;
查询,rowkey 去重
0: jdbc:hive2://fuze245:10000> select * from hbase_table; +------------------+--------------------+--+
| hbase_table.key | hbase_table.value | +------------------+--------------------+--+
| 1103 | 121.201.34.136 |
| 2165 | 121.201.34.136 |
| 318 | 121.201.34.136 |
| 397 | 113.208.116.106 |
| 587 | 113.208.116.106 |
| 60 | 121.201.34.136 |
| 9566 | 120.24.89.125 | +------------------+--------------------+--+
7 rows selected (0.865 seconds)
hbase 查询
hbase(main):014:0> scan 'xyz'
ROW COLUMN+CELL
1103 column=cf1:val, timestamp=1456286326149, value=121.201.34.136
2165 column=cf1:val, timestamp=1456286326149, value=121.201.34.136
318 column=cf1:val, timestamp=1456286326149, value=121.201.34.136
397 column=cf1:val, timestamp=1456286326149, value=113.208.116.106
587 column=cf1:val, timestamp=1456286326149, value=113.208.116.106
60 column=cf1:val, timestamp=1456286326149, value=121.201.34.136
9566 column=cf1:val, timestamp=1456286326149, value=120.24.89.125
7 row(s) in 0.0800 seconds
插入测试 略
hbase(main):015:0> put 'xyz', 123, 'cf1:val', '1.1.1.1'
二 ,hbase已存在 ,创建hive 必须要EXTERNAL
CREATE EXTERNAL TABLE hbase_table_3(key int,fi map<string,string>) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = "fi:") TBLPROPERTIES ("hbase.table.name" = "hbase_1");
插入数据
hbase(main):032:0> put 'hbase_1', '123' ,'fi:a', '1.1.1.1'
0 row(s) in 0.0260 seconds
hbase(main):033:0> put 'hbase_1', '123' ,'fi:b', '1.1.1.1'
0 row(s) in 0.0100 seconds
hbase(main):034:0> put 'hbase_1', '123', 'fi:c', '1.1.1.1'
0 row(s) in 0.0160 seconds
hbase(main):035:0> scan 'hbase_1'
ROW COLUMN+CELL
123 column=fi:a, timestamp=1456299052170, value=1.1.1.1
123 column=fi:b, timestamp=1456299052242, value=1.1.1.1
123 column=fi:c, timestamp=1456299053714, value=1.1.1.1
1 row(s) in 0.2050 seconds
hive 查询
0: jdbc:hive2://fuze245:10000> select * from hbase_table_3; +--------------------+------------------------------------------------------------+--+
| hbase_table_3.key | hbase_table_3.fi | +--------------------+------------------------------------------------------------+--+
| 123 | {"a":"1.1.1.1","b":"1.1.1.1","c":"1.1.1.1","d":"1.1.1.1"} | +--------------------+------------------------------------------------------------+--+
load 数据报错
0: jdbc:hive2://fuze245:10000> load data local inpath '/root/test' into table hbase_table_3;
Error: Error while compiling statement: FAILED: SemanticException [Error 10101]: A non-native table cannot be used a
insert into 可以
0: jdbc:hive2://fuze245:10000> select * from test3; +-----------+-----------------------------------------+--+
| test3.id | test3.perf | +-----------+-----------------------------------------+--+
| 234 | {"job":"80","team":"60","person":"70"} | +-----------+-----------------------------------------+--+
1 row selected (0.206 seconds)
0: jdbc:hive2://fuze245:10000> insert into table hbase_table_3 select * from test3;
结果
0: jdbc:hive2://fuze245:10000> select * from hbase_table_3; +--------------------+------------------------------------------------------------+--+
| hbase_table_3.key | hbase_table_3.fi | +--------------------+------------------------------------------------------------+--+
| 123 | {"a":"1.1.1.1","b":"1.1.1.1","c":"1.1.1.1","d":"1.1.1.1"} |
| 234 | {"job":"80","person":"70","team":"60"} | +--------------------+------------------------------------------------------------+--+
hbase(main):038:0> scan 'hbase_1'
ROW COLUMN+CELL
123 column=fi:a, timestamp=1456299052170, value=1.1.1.1
123 column=fi:b, timestamp=1456299052242, value=1.1.1.1
123 column=fi:c, timestamp=1456299053714, value=1.1.1.1
123 column=fi:d, timestamp=1456299336322, value=1.1.1.1
234 column=fi:job, timestamp=1456300023549, value=80
234 column=fi:person, timestamp=1456300023549, value=70
234 column=fi:team, timestamp=1456300023549, value=60
2 row(s) in 0.0380 seconds