Hive集成HBase(一)
Hive集成HBase(二)
hive --auxpath /root/lib/hive-hbase-handler-0.9.0.jar, /root/lib/hbase-0.92.0.jar, /root/lib/zookeeper-3.3.4.jar, /root/lib/guava-r09.jar --hiveconf hbase.master=hbase.yoyodyne.com:60000
ADD jar /root/lib/hive-hbase-handler-0.9.0.jar /root/lib/hbase-0.92.0.jar /root/lib/zookeeper-3.3.4.jar /root/lib/guava-r09.jar; SET hbase.master=hbase.yoyodyne.com:60000;
hive --auxpath /root/lib/hive-hbase-handler-0.9.0.jar, /root/lib/hbase-0.92.0.jar, /root/lib/zookeeper-3.3.4.jar, /root/lib/guava-r09.jar --hiveconf hbase.zookeeper.quorum=zk1.yoyodyne.com,zk2.yoyodyne.com,zk3.yoyodyne.com
ADD jar /root/lib/hive-hbase-handler-0.9.0.jar /root/lib/hbase-0.92.0.jar /root/lib/zookeeper-3.3.4.jar /root/lib/guava-r09.jar; SET hbase.zookeeper.quorum=zk1.yoyodyne.com,zk2.yoyodyne.com,zk3.yoyodyne.com;
CREATE TABLE grades( id int, name string, age int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
hive> SELECT * FROM grades; OK 1 Tom 24 2 Bill 25 3 Alice 24 Time taken: 0.17 seconds, Fetched: 3 row(s)
CREATE TABLE hbase_table( key int, name string, age int) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key, cf1:name, cf1:age") TBLPROPERTIES ("hbase.table.name" = "xyz", "hbase.mapred.output.outputtable" = "xyz");
INSERT OVERWRITE TABLE hbase_table SELECT * FROM grades;
CREATE TABLE hbase_table( key int, name string, age int, txt string) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ( "hbase.columns.mapping" = ":key, cf1:name, cf1:name, cf2:age) TBLPROPERTIES ("hbase.table.name" = "xyz", "hbase.mapred.output.outputtable" = "xyz");
INSERT OVERWRITE TABLE hbase_table SELECT id, name, age, cast(age as string) FROM grades;
CREATE TABLE hbase_table( key int, txt map<string, string>) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ( "hbase.columns.mapping" = ":key, cf:) TBLPROPERTIES ("hbase.table.name" = "xyz", "hbase.mapred.output.outputtable" = "xyz");
hive> source hive_hbase.hql; OK Time taken: 0.041 seconds OK Time taken: 3.953 seconds hive> show tables; OK grades hbase_table Time taken: 0.021 seconds, Fetched: 2 row(s) hive> select * from hbase_table; OK Time taken: 1.122 seconds
hbase(main):001:0> list TABLE 0 row(s) in 0.6350 seconds => []
INSERT OVERWRITE TABLE hbase_table SELECT id, map('name', name, 'age', cast(age as string)) FROM grades;
hive> source insert.hql; OK Time taken: 0.014 seconds FAILED: SemanticException [Error 10044]: Line 3:23 Cannot insert into target table because column number/types are different 'hbase_table': Table insclause-0 has 2 columns, but query has 3 columns. hive> source insert.hql; OK Time taken: 0.04 seconds Query ID = root_20160316001818_8ea198e2-f1ca-424d-b8c3-c329fc497c81 Total jobs = 1 Launching Job 1 out of 1 Number of reduce tasks is set to 0 since there's no reduce operator Starting Job = job_1458112443618_0001, Tracking URL = http://slave-1:8088/proxy/application_1458112443618_0001/ Kill Command = /root/install/hadoop-2.4.1/bin/hadoop job -kill job_1458112443618_0001 Hadoop job information for Stage-0: number of mappers: 1; number of reducers: 0 2016-03-16 00:19:16,175 Stage-0 map = 0%, reduce = 0% 2016-03-16 00:19:28,328 Stage-0 map = 100%, reduce = 0%, Cumulative CPU 2.75 sec MapReduce Total cumulative CPU time: 2 seconds 750 msec Ended Job = job_1458112443618_0001 MapReduce Jobs Launched: Stage-Stage-0: Map: 1 Cumulative CPU: 2.75 sec HDFS Read: 255 HDFS Write: 0 SUCCESS Total MapReduce CPU Time Spent: 2 seconds 750 msec OK Time taken: 38.091 seconds hive> select * from hbase_table; OK 1 {"age":"24","name":"Tom"} 2 {"age":"25","name":"Bill"} 3 {"age":"24","name":"Alice"} Time taken: 0.181 seconds, Fetched: 3 row(s)
hbase(main):004:0> scan 'xyz' ROW COLUMN+CELL 1 column=cf:age, timestamp=1458112767571, value=24 1 column=cf:name, timestamp=1458112767571, value=Tom 2 column=cf:age, timestamp=1458112767571, value=25 2 column=cf:name, timestamp=1458112767571, value=Bill 3 column=cf:age, timestamp=1458112767571, value=24 3 column=cf:name, timestamp=1458112767571, value=Alice 3 row(s) in 0.3680 seconds
CREATE EXTERNAL TABLE hbase_table( id int, txt map<string, string>) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = " :key, cf:") TBLPROPERTIES ("hbase.table.name" = "xyz", "hbase.mapred.output.outputtable" = "xyz");
INSERT OVERWRITE TABLE hbase_table SELECT id, map('name', name, 'age', cast(age as string)) FROM grades;
CREATE EXTERNAL TABLE hbase_table_1( id int, txt map<string, string>) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = " :key, cf:") TBLPROPERTIES ("hbase.table.name" = "xyz", "hbase.mapred.output.outputtable" = "xyz");
hive> select * from hbase_table_1 where id > 1; OK 2 {"age":"25","name":"Bill"} 3 {"age":"24","name":"Alice"} Time taken: 0.211 seconds, Fetched: 2 row(s)