Tachyon基本使用10-----Running Shark on Tachyon

一、安装Hive

1.下载并解压Hive

[root@node1 soft]# tar -zxf apache-hive-0.13.1-bin.tar.gz -C /usr/local/
[root@node1 soft]# ln -s /usr/local/apache-hive-0.13.1-bin/ /usr/local/hive
[root@node1 soft]#

2.配置hive环境变量

[root@node1 soft]# cat /etc/profile.d/hive.sh
HIVE_HOME=/usr/local/hive
PATH=$HIVE_HOME/bin:$PATH
[root@node1 soft]# . /etc/profile.d/hive.sh
[root@node1 soft]#

3.安装mysql数据库

[root@node1 soft]# yum -y install mysql-server
[root@node1 soft]#chkconfig mysqld on
[root@node1 soft]#service mysqld restart

4.配置mysql权限

mysql> create
database hive;
Query OK, 1 row affected
(0.00 sec)
 
mysql> grant all on
*.* to 'hive'@'%' identified by '123456';
Query OK, 0 rows
affected (0.00 sec)
 
mysql>

5.COPY配置文件

[root@node1 conf]# pwd
/usr/local/hive/conf
[root@node1 conf]#  cp hive-env.sh.template hive-env.sh
[root@node1 conf]# cphive-default.xml.template hive-site.xml
[root@node1 conf]# cphive-exec-log4j.properties.template hive-exec-log4j.properties
[root@node1 conf]# cphive-log4j.properties.template hive-log4j.properties
[root@node1 conf]#

6.配置hive-env.sh

[root@node1 conf]# pwd
/usr/local/hive/conf
[root@node1 conf]# cat
hive-env.sh |grep -v ^$ |grep -v ^#
HADOOP_HOME=/usr/local/hadoop
[root@node1 conf]#

7.配置hive-site.sh

[root@node1 conf]#  pwd
/usr/local/bigdata/hive/conf
[root@node1 conf]#  cat hive-site.xml |grep -v ^$|grep -v ^#
<?xmlversion="1.0"?>
<?xml-stylesheettype="text/xsl" href="configuration.xsl"?>
<configuration>
   <property>
     <name>javax.jdo.option.ConnectionURL</name>
      <value>jdbc:mysql://node1:3306/hive?createDatabaseIfNotExist=true</value>
      <description>JDBC connect stringfor a JDBC metastore</description>
   </property>
   <property>
     <name>javax.jdo.option.ConnectionDriverName</name>
      <value>com.mysql.jdbc.Driver</value>
      <description>Driver class name fora JDBC metastore</description>
   </property>
   <property>
     <name>javax.jdo.option.ConnectionUserName</name>
      <value>hive</value>
      <description>username to useagainst metastore database</description>
   </property>
   <property>
     <name>javax.jdo.option.ConnectionPassword</name>
      <value>123456</value>
      <description>password to useagainst metastore database</description>
   </property>
</configuration>
[root@node1 conf]#

8.复制jdbc驱动

[root@node1 soft]# cp mysql-connector-java-5.1.22-bin.jar /usr/local/hive/lib/
[root@node1 soft]#

二、安装shark

1.下载并加压shark

[root@node1 soft]# tar -zxf shark-0.9.1-bin-hadoop2.tgz -C /usr/local/ 
[root@node1 soft]# ln -s /usr/local/shark-0.9.1-bin-hadoop2/ /usr/local/shark
[root@node1 soft]#

2.配置shark环境变量

[root@node1 soft]# cat /etc/profile.d/shark.sh
SHARK_HOME=/usr/local/shark
PATH=$SHARK_HOME/bin:$PATH
[root@node1 soft]# . /etc/profile.d/shark.sh
[root@node1 soft]#

3.配置shark-env.sh

[root@node1 conf]# pwd
/usr/local/shark/conf
[root@node1 conf]# cat shark-env.sh|grep -v ^$|grep -v ^#
export JAVA_HOME=/usr/java/latest
export SPARK_MEM=128m
export SHARK_MASTER_MEM=128m
export HADOOP_HOME="/usr/local/hadoop"
export HIVE_HOME="/usr/local/hive"
export HIVE_CONF_DIR="/usr/local/hive/conf"
export SCALA_HOME="/usr/local/scala"
export MASTER="spark://192.168.1.1:7077"
export SPARK_HOME="/usr/local/spark"
SPARK_JAVA_OPTS=" -Dspark.local.dir=/tmp "
SPARK_JAVA_OPTS+="-Dspark.kryoserializer.buffer.mb=10"
SPARK_JAVA_OPTS+="-verbose:gc -XX:-PrintGCDetails -XX:+PrintGCTimeStamps "
export SPARK_JAVA_OPTS
[root@node1 conf]#

4.复制hive配置到所有slaves

[root@node1 ~]# scp -r/usr/local/apache-hive-0.13.1-bin/ node2:/usr/local/
[root@node1 ~]# scp -r/usr/local/apache-hive-0.13.1-bin/ node3:/usr/local/
[root@node1 ~]# sshnode2 ln -s /usr/local/apache-hive-0.13.1-bin/ /usr/local/hive
[root@node1 ~]# sshnode3 ln -s /usr/local/apache-hive-0.13.1-bin/ /usr/local/hive
[root@node1 ~]# scp/etc/profile.d/hive.sh node2:/etc/profile.d/
hive.sh                                      100%   52     0.1KB/s  00:00    
[root@node1 ~]# scp/etc/profile.d/hive.sh node3:/etc/profile.d/
hive.sh                                      100%   52     0.1KB/s  00:00    
[root@node1 ~]#

5.复制Shark目录到所有的slaves

[root@node1 ~]# scp -r/usr/local/shark-0.9.1-bin-hadoop2/ node2:/usr/local/
[root@node1 ~]# scp -r/usr/local/shark-0.9.1-bin-hadoop2/ node3:/usr/local/
[root@node1 ~]# sshnode2 ln -s /usr/local/shark-0.9.1-bin-hadoop2/ /usr/local/shark
[root@node1 ~]# sshnode3 ln -s /usr/local/shark-0.9.1-bin-hadoop2/ /usr/local/shark
[root@node1 ~]# scp/etc/profile.d/shark.sh node2:/etc/profile.d/
shark.sh                                      100%   55     0.1KB/s  00:00    
[root@node1 ~]# scp/etc/profile.d/shark.sh node3:/etc/profile.d/
shark.sh                                      100%   55    0.1KB/s   00:00    
[root@node1 ~]#

6.替换 jar hive-exec-0.11.0-shark-0.9.1.jar

cd $SHARK_HOME/lib_managed/jars/edu.berkeley.cs.shark/hive-exec
unziphive-exec-0.11.0-shark-0.9.1.jar
rm -fcom/google/protobuf/*
rm  hive-exec-0.11.0-shark-0.9.1.jar
zip -rhive-exec-0.11.0-shark-0.9.1.jar *
rm -rf comhive-exec-log4j.properties javaewah/ javax/ javolution/ META-INF/ org/

三、Shark整合Tachyon

1.修改shark-env.sh添加如下2

export TACHYON_MASTER="tachyon://node1:19998"
export TACHYON_WAREHOUSE_PATH=/sharktables

2.同步修改的配置到其它节点

[root@node1 conf]# scpshark-env.sh node2:/usr/local/shark/conf/
shark-env.sh                                  100%2253     2.2KB/s   00:00   
[root@node1 conf]# scpshark-env.sh node3:/usr/local/shark/conf/
shark-env.sh                                  100%2253     2.2KB/s   00:00   
[root@node1 conf]#

3.启动shark

[root@node1 conf]# shark
shark> show tables;
OK
ssq
Time taken: 0.841
seconds
shark>

4. 指定TBLPROPERTIES(“shark.cache” = “tachyon”)将表缓存到tachyon

shark> create table test1 TBLPROPERTIES("shark.cache" = "tachyon") as select * from ssq;

5. 指定表的名称与_tachyon结尾将表缓存到Tachyon

shark> CREATE TABLEtest_tachyon AS SELECT * FROM ssq;



你可能感兴趣的:(hadoop,spark,Tachyon)