/etc/profile
export JAVA_HOME=/usr/local/jdk1.8.0_171 export JRE_HOME=$JAVA_HOME/jre export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$JRE_HOME/lib:$CLASSPATH export PATH=$JAVA_HOME/bin:$PATH export HADOOP_HOME=/data/hadoop export HIVE_HOME=/data/hive-3.1.0 export ZOOKEEPER_HOME=/data/zookeeper-3.4.13 export PATH=$PATH:$HADOOP_HOME/bin:$ZOOKEEPER_HOME/bin:$HIVE_HOME/bin
1、MySQL作为存储元数据的数据库,所以需要把连接MySQL的jar包放入或链接到$HIVE_HOME/lib目录下。
yum install mysql-connector-java
ln -s /usr/share/java/mysql-connector-java.jar /data/hive-3.1.0/lib/mysql-connector-java.jar
2、修改hive-site.xml
cd /data/hive-3.1.0/conf cp hive-env.sh.template hive-env.sh mkdir /data/hive-3.1.0/tmp
内嵌模式,特点是:hive服务和metastore服务运行在同一个进程中,derby服务也运行在该进程中。
该模式无需特殊配置
本地模式,特点是:hive服务和metastore服务运行在同一个进程中,mysql是单独的进程,可以在同一台机器上,也可以在远程机器上。
该模式只需将hive-site.xml中的ConnectionURL指向mysql,并配置好驱动名、数据库连接账号即可:
远程模式,特点是:hive服务和metastore在不同的进程内,可能是不同的机器。
hive.metastore.db.type mysql Expects one of [derby, oracle, mysql, mssql, postgres]. Type of database used by the metastore. Information schema & JDBCStorageHandler depend on it. hive.metastore.warehouse.dir /hive/warehouse hive.metastore.uris thrift://namenode1:9083 Thrift uri for the remote metastore. Used by metastore client to connect to remote metastore. javax.jdo.option.ConnectionURL jdbc:mysql://172.16.99.40:3306/hive?createDatabaseIfNotExist=true javax.jdo.option.ConnectionDriverName com.mysql.jdbc.Driver javax.jdo.option.ConnectionUserName hive javax.jdo.option.ConnectionPassword hive hive.exec.local.scratchdir /data/hive-3.1.0/tmp Local scratch space for Hive jobs hive.downloaded.resources.dir /data/hive-3.1.0/tmp Temporary local directory for added resources in the remote file system.
客户端配置
hive.metastore.warehouse.dir /hive/warehouse hive.metastore.local false hive.metastore.uris thrift://namenode2:9083
3、初始化数据库
/data/hive-3.1.0/bin/schematool --dbType mysql --initSchema
4、解决日志jar冲突
保留/data/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar
rm /data/hive-3.1.0/lib/log4j-slf4j-impl-2.10.0.jar
5、检查环境
hadoop version
hadoop version | awk '{if (NR == 1) {print $2;}}'
7、启动服务端
$HIVE_HOME/bin/hive --service metastore &
8、找个非hadoop机器机器,部署并启动客户端
scp -r -P 21860 /data/hadoop root@mysql1:/data/ scp -r -P 21860 /data/hive-3.1.0 root@mysql1:/data/ source /etc/profile hive
9、服务器启动hiveserver2 (参考 https://blog.csdn.net/zhanglh046/article/details/78572926)
./hiveserver2 start 客户端使用 # beeline Beeline version 1.2.1 by Apache Hive beeline> !connect jdbc:hive2://namenode2:10000 Connecting to jdbc:hive2://namenode2:10000 Enter username for jdbc:hive2://namenode2:10000: hive_user Enter password for jdbc:hive2://namenode2:10000: ******
----------------------------------------------------------------------------
旧配置
----------------------------------------------------------------------------
http://apache.fayea.com/hive/hive-2.0.0/
wget http://apache.fayea.com/hive/hive-2.0.0/apache-hive-2.0.0-bin.tar.gz
tar zxvf apache-hive-2.0.0-bin.tar.gz
mv apache-hive-2.0.0-bin /home/hadoop/hive
cd /home/hadoop/hive
cd conf
cp hive-default.xml.template hive-site.xml
hive.metastore.warehouse.dir
/home/hadoop/hive-warehouse
javax.jdo.option.ConnectionURL
jdbc:mysql://hadoop-master:3306/hive?createDatabaseIfNotExist=true
javax.jdo.option.ConnectionDriverName
com.mysql.jdbc.Driver
javax.jdo.option.ConnectionUserName
root
javax.jdo.option.ConnectionPassword
hive
hive.metastore.local
false
hive.metastore.uris
thrift://hadoop-master:9083
datanucleus.readOnlyDatastore
false
datanucleus.fixedDatastore
false
datanucleus.autoCreateSchema
true
datanucleus.autoCreateTables
true
datanucleus.autoCreateColumns
true
hive-env.sh
# Hive Configuration Directory can be controlled by:
export HIVE_CONF_DIR=/home/hadoop/hive/conf
# Folder containing extra ibraries required for hive compilation/execution can be controlled by:
export HIVE_AUX_JARS_PATH=/home/hadoop/hive/lib
下载mysql的jdbc
http://dev.mysql.com/downloads/file/?id=460362
把mysql-connector-java-5.1.38-bin.jar 放到//home/hadoop/hive/lib
启动hive服务端程序
hive --service metastore
客户端直接使用hive命令即可
hive
退出
exit;
hadoop job -kill jobid
create table test1 (col_1 string, col_2 string, col_3 string ,col_4 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE;
从本地文件导入
LOAD DATA LOCAL INPATH '/home/hadoop/a.txt' OVERWRITE INTO TABLE test1;
dfs -ls /home/hadoop/hive-warehouse/test1
/home/hadoop/hive-warehouse是hive-site.xml里面配置路径,不是真实物理路径
从hadoop文件导入
hadoop fs -put /home/hadoop/b.txt /data/input
hadoop fs -ls /data/input
LOAD DATA INPATH '/data/input/a.txt' OVERWRITE INTO TABLE test1;
从表导入
create table test2 (col_1 string, col_2 string, col_3 string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE;
insert into table test2 as select col_1,col_2,col_3 from test1;
create table test3 as select * from test1;