一、安装Hive
1.下载并解压Hive
[root@node1 soft]# tar -zxf apache-hive-0.13.1-bin.tar.gz -C /usr/local/ [root@node1 soft]# ln -s /usr/local/apache-hive-0.13.1-bin/ /usr/local/hive [root@node1 soft]#
2.配置hive环境变量
[root@node1 soft]# cat /etc/profile.d/hive.sh HIVE_HOME=/usr/local/hive PATH=$HIVE_HOME/bin:$PATH [root@node1 soft]# . /etc/profile.d/hive.sh [root@node1 soft]#
3.安装mysql数据库
[root@node1 soft]# yum -y install mysql-server [root@node1 soft]#chkconfig mysqld on [root@node1 soft]#service mysqld restart
4.配置mysql权限
mysql> create database hive; Query OK, 1 row affected (0.00 sec) mysql> grant all on *.* to 'hive'@'%' identified by '123456'; Query OK, 0 rows affected (0.00 sec) mysql>
5.COPY配置文件
[root@node1 conf]# pwd /usr/local/hive/conf [root@node1 conf]# cp hive-env.sh.template hive-env.sh [root@node1 conf]# cphive-default.xml.template hive-site.xml [root@node1 conf]# cphive-exec-log4j.properties.template hive-exec-log4j.properties [root@node1 conf]# cphive-log4j.properties.template hive-log4j.properties [root@node1 conf]#
6.配置hive-env.sh
[root@node1 conf]# pwd /usr/local/hive/conf [root@node1 conf]# cat hive-env.sh |grep -v ^$ |grep -v ^# HADOOP_HOME=/usr/local/hadoop [root@node1 conf]#
7.配置hive-site.sh
[root@node1 conf]# pwd /usr/local/bigdata/hive/conf [root@node1 conf]# cat hive-site.xml |grep -v ^$|grep -v ^# <?xmlversion="1.0"?> <?xml-stylesheettype="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://node1:3306/hive?createDatabaseIfNotExist=true</value> <description>JDBC connect stringfor a JDBC metastore</description> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> <description>Driver class name fora JDBC metastore</description> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>hive</value> <description>username to useagainst metastore database</description> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>123456</value> <description>password to useagainst metastore database</description> </property> </configuration> [root@node1 conf]#
8.复制jdbc驱动
[root@node1 soft]# cp mysql-connector-java-5.1.22-bin.jar /usr/local/hive/lib/ [root@node1 soft]#
二、安装shark
1.下载并加压shark
[root@node1 soft]# tar -zxf shark-0.9.1-bin-hadoop2.tgz -C /usr/local/ [root@node1 soft]# ln -s /usr/local/shark-0.9.1-bin-hadoop2/ /usr/local/shark [root@node1 soft]#
2.配置shark环境变量
[root@node1 soft]# cat /etc/profile.d/shark.sh SHARK_HOME=/usr/local/shark PATH=$SHARK_HOME/bin:$PATH [root@node1 soft]# . /etc/profile.d/shark.sh [root@node1 soft]#
3.配置shark-env.sh
[root@node1 conf]# pwd /usr/local/shark/conf [root@node1 conf]# cat shark-env.sh|grep -v ^$|grep -v ^# export JAVA_HOME=/usr/java/latest export SPARK_MEM=128m export SHARK_MASTER_MEM=128m export HADOOP_HOME="/usr/local/hadoop" export HIVE_HOME="/usr/local/hive" export HIVE_CONF_DIR="/usr/local/hive/conf" export SCALA_HOME="/usr/local/scala" export MASTER="spark://192.168.1.1:7077" export SPARK_HOME="/usr/local/spark" SPARK_JAVA_OPTS=" -Dspark.local.dir=/tmp " SPARK_JAVA_OPTS+="-Dspark.kryoserializer.buffer.mb=10" SPARK_JAVA_OPTS+="-verbose:gc -XX:-PrintGCDetails -XX:+PrintGCTimeStamps " export SPARK_JAVA_OPTS [root@node1 conf]#
4.复制hive配置到所有slaves
[root@node1 ~]# scp -r/usr/local/apache-hive-0.13.1-bin/ node2:/usr/local/ [root@node1 ~]# scp -r/usr/local/apache-hive-0.13.1-bin/ node3:/usr/local/ [root@node1 ~]# sshnode2 ln -s /usr/local/apache-hive-0.13.1-bin/ /usr/local/hive [root@node1 ~]# sshnode3 ln -s /usr/local/apache-hive-0.13.1-bin/ /usr/local/hive [root@node1 ~]# scp/etc/profile.d/hive.sh node2:/etc/profile.d/ hive.sh 100% 52 0.1KB/s 00:00 [root@node1 ~]# scp/etc/profile.d/hive.sh node3:/etc/profile.d/ hive.sh 100% 52 0.1KB/s 00:00 [root@node1 ~]#
5.复制Shark目录到所有的slaves
[root@node1 ~]# scp -r/usr/local/shark-0.9.1-bin-hadoop2/ node2:/usr/local/ [root@node1 ~]# scp -r/usr/local/shark-0.9.1-bin-hadoop2/ node3:/usr/local/ [root@node1 ~]# sshnode2 ln -s /usr/local/shark-0.9.1-bin-hadoop2/ /usr/local/shark [root@node1 ~]# sshnode3 ln -s /usr/local/shark-0.9.1-bin-hadoop2/ /usr/local/shark [root@node1 ~]# scp/etc/profile.d/shark.sh node2:/etc/profile.d/ shark.sh 100% 55 0.1KB/s 00:00 [root@node1 ~]# scp/etc/profile.d/shark.sh node3:/etc/profile.d/ shark.sh 100% 55 0.1KB/s 00:00 [root@node1 ~]#
6.替换 jar 包 “hive-exec-0.11.0-shark-0.9.1.jar”
cd $SHARK_HOME/lib_managed/jars/edu.berkeley.cs.shark/hive-exec unziphive-exec-0.11.0-shark-0.9.1.jar rm -fcom/google/protobuf/* rm hive-exec-0.11.0-shark-0.9.1.jar zip -rhive-exec-0.11.0-shark-0.9.1.jar * rm -rf comhive-exec-log4j.properties javaewah/ javax/ javolution/ META-INF/ org/
三、Shark整合Tachyon
1.修改shark-env.sh
添加如下
2
行
export TACHYON_MASTER="tachyon://node1:19998" export TACHYON_WAREHOUSE_PATH=/sharktables
2.同步修改的配置到其它节点
[root@node1 conf]# scpshark-env.sh node2:/usr/local/shark/conf/ shark-env.sh 100%2253 2.2KB/s 00:00 [root@node1 conf]# scpshark-env.sh node3:/usr/local/shark/conf/ shark-env.sh 100%2253 2.2KB/s 00:00 [root@node1 conf]#
3.启动shark
[root@node1 conf]# shark shark> show tables; OK ssq Time taken: 0.841 seconds shark>
4. 指定TBLPROPERTIES(“shark.cache” = “tachyon”)将表缓存到tachyon
shark> create table test1 TBLPROPERTIES("shark.cache" = "tachyon") as select * from ssq;
5. 指定表的名称与_tachyon结尾将表缓存到Tachyon
shark> CREATE TABLEtest_tachyon AS SELECT * FROM ssq;