在集群上安装spark

1.从http://spark.apache.org/ 下载spark 1.2.解压到当前目录,进入spark 1.2.
2.编译命令,和hadoop2.0.5兼容。
export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"
 mvn -Pyarn-alpha -Dhadoop.version=2.5.2 -DskipTests clean package
 
3.从http://scala-lang.org/download/all.html 下载2.10.4

3.进入集群master,cd /home/hadoop/setupSpark
scp [email protected]:/usr/local/houzhizhen/scala/scala-2.10.4.tgz .
scp [email protected]:/usr/local/spark/spark-1.2.0-bin-hadoop2.4.tgz .
scp [email protected]:/home/houzhizhen/git/architecture/hadoop/spark/setupSpark.sh .

./upgrade.sh distribute newslaves scala-2.10.4.tgz /tmp/
./upgrade.sh distribute newslaves spark-1.2.0-bin-hadoop2.4.tgz /tmp/

./upgrade.sh common newslaves "cd /tmp; tar -xzf scala-2.10.4.tgz"
./upgrade.sh common newslaves "rm -rf /data/hadoop/data1/usr/local/scala-2.10.4; mv /tmp/scala-2.10.4 /data/hadoop/data1/usr/local/; rm -rf /usr/local/scala; ln -s /data/hadoop/data1/usr/local/scala-2.10.4 /usr/local/scala"
./upgrade.sh common newslaves "rm -rf /tmp/scala-2.10.4.tgz"

./upgrade.sh common newslaves "cd /tmp; tar -xzf spark-1.2.0-bin-hadoop2.4.tgz"
./upgrade.sh common newslaves "rm -rf /data/hadoop/data1/usr/local/spark-1.2.0-bin-hadoop2.4; mv /tmp/spark-1.2.0-bin-hadoop2.4 /data/hadoop/data1/usr/local/; rm -rf /usr/local/spark; ln -s /data/hadoop/data1/usr/local/spark-1.2.0-bin-hadoop2.4 /usr/local/spark "
./upgrade.sh common newslaves "rm -rf /tmp/spark-1.2.0-bin-hadoop2.4.tgz"
./upgrade.sh common newslaves "chown -R hadoop:hadoop /data/hadoop/data1/usr/local/spark-1.2.0-bin-hadoop2.4"


#./upgrade.sh common newslaves "cp /usr/local/hadoop/lib/hadoop-gpl-compression-0.2.0-dev.jar /usr/local/spark/lib"

./upgrade.sh common newslaves "echo 'export SCALA_HOME=/usr/local/scala' >> /etc/profile"
./upgrade.sh common newslaves "echo 'export SPARK_HOME=/usr/local/spark' >> /etc/profile"
./upgrade.sh common newslaves "echo 'export PATH=/usr/local/scala/bin:/usr/local/spark/bin:$PATH'  >> /etc/profile"
 cp spark-1.2.0-bin-hadoop2.4/conf/spark-env.sh.template spark-1.2.0-bin-hadoop2.4/conf/spark-env.sh
vim spark-1.2.0-bin-hadoop2.4/conf/spark-env.sh
#add the following contents.
export JAVA_HOME=/usr/local/java
export SCALA_HOME=/usr/local/scala
export SPARK_MASTER=10-140-60-85
export SPARK_WORKER_MEMORY=50g
export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop

./upgrade.sh distribute newslaves spark-1.2.0-bin-hadoop2.4/conf/spark-env.sh /usr/local/spark/conf/

cat newslaves > spark-1.2.0-bin-hadoop2.4/conf/slaves
#添加所有节点
./upgrade.sh distribute newslaves spark-1.2.0-bin-hadoop2.4/conf/slaves /usr/local/spark/conf/

#修改namenode所在结点的SPARK_WORKER_MEMORY=30g
su - hadoop
ssh 10.140.60.85 "/usr/local/spark/sbin/start-all.sh"

你可能感兴趣的:(spark)