原理是用相对路劲设置文件目录
export HBASE_HOME=/opt/modules/hbase-0.98.6-cdh5.3.0
export HIVE_HOME=/opt/modules/hive-0.13.1-bin
ln -s $HBASE_HOME/lib/hbase-server-0.98.6-cdh5.3.0.jar$HIVE_HOME/hbase-server-0.98.6-cdh5.3.0.jar
ln -s $HBASE_HOME/lib/hbase-client-0.98.6-cdh5.3.0.jar$HIVE_HOME/hbase-client-0.98.6-cdh5.3.0.jar
ln -s $HBASE_HOME/lib/hbase-protocol-0.98.6-cdh5.3.0.jar$HIVE_HOME/hbase-protocol-0.98.6-cdh5.3.0.jar
ln -s $HBASE_HOME/lib/hbase-it-0.98.6-cdh5.3.0.jar$HIVE_HOME/hbase-it-0.98.6-cdh5.3.0.jar
ln -s $HBASE_HOME/lib/htrace-core-2.04.jar$HIVE_HOME/htrace-core-2.04.jar
ln -s $HBASE_HOME/lib/hbase-hadoop2-compat-0.98.6-cdh5.3.0.jar$HIVE_HOME/hbase-hadoop2-compat-0.98.6-cdh5.3.0.jar
ln -s $HBASE_HOME/lib/hbase-hadoop-compat-0.98.6-cdh5.3.0.jar$HIVE_HOME/hbase-hadoop-compat-0.98.6-cdh5.3.0.jar
ln -s $HBASE_HOME/lib/high-scale-lib-1.1.1.jar$HIVE_HOME/high-scale-lib-1.1.1.jar
ln -s $HBASE_HOME/lib/hbase-common-0.98.6-cdh5.3.0.jar$HIVE_HOME/hbase-common-0.98.6-cdh5.3.0.jar
CREATE EXTERNAL TABLE weblog(
id string,
datatime string,
userid string,
searchname string,
retorder string,
cliorder string,
cliurl string
)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,info:datatime,info:userid,info:searchname,info:retorder,info:cliorder,info:cliurl")
TBLPROPERTIES("hbase.table.name" = "weblog");
执行上面一段话的时候遇到的问题
2018-02-15 20:07:08,593 ERROR [main]: exec.DDLTask (DDLTask.java:execute(481)) - java.lang.NoClassDefFoundError: org/apache/hadoop/hbase/HBaseConfiguration
at org.apache.hadoop.hive.hbase.HBaseStorageHandler.setConf(HBaseStorageHandler.java:279)
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:73)
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:133)
at org.apache.hadoop.hive.ql.metadata.HiveUtils.getStorageHandler(HiveUtils.java:304)
at org.apache.hadoop.hive.ql.metadata.Table.getStorageHandler(Table.java:301)
at org.apache.hadoop.hive.ql.exec.DDLTask.createTable(DDLTask.java:4107)
at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:284)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:153)
at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:85)
at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1554)
at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1321)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1139)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:962)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:952)
at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269)
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:221)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:431)
at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:800)
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:694)
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:633)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.RunJar.main(RunJar.java:212)
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hbase.HBaseConfiguration
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
... 25 more
报错表明找不到hbase的某些类,所以我就在hive-site.xml中配置了hbase的环境变量
# Set HADOOP_HOME to point to a specific hadoop install directory
export HADOOP_HOME=/opt/modules/hadoop-2.5.0
export HBASE_HOME=/opt/modules/hbase-0.98.6-cdh5.3.0
# Hive Configuration Directory can be controlled by:
export HIVE_CONF_DIR=/opt/modules/hive-0.13.1-cdh5.3.0/conf
hbase的学习地址 : https://www.yiibai.com/hbase/hbase_create_data.html
因为Hbase只对行键(rowKey)做索引,所以没办法有复杂的条件查询.
Hbase适合存储的数据具有以下几点:
1 存储半结构化或非结构化数据,结构化数据因保存在mysql这样的数据库中.半结构化可以方便扩展字段,非结构化数据不用复杂的条件查询,所以也适合存储
2 记录稀疏的数据,字段为空的直接不存储,而不是存储一个null,这样更有利于节省空间
3 多版本
上文提到的根据Row key和Column key定位到的Value可以有任意数量的版本值,,版本号就是时间戳,hbase中的更新其实也是插入新时间戳下的记录,因此对于需要存储变动历史记录的数据,用HBase就非常方便了
4 超大数据量
hbase支持自动切分,海量数据下,查询速度也非常快