软件准备:Index of /dist/spark,选择跟hadoop集成的版本
1,解压:tar -zxvf spark-2.4.0-bin-hadoop2.6.tgz
mv spark-2.4.0-bin-hadoop2.6 spark
vim /etc/profile.d/bigdata-etc.sh
export SPARK_HOME=/opt/spark
export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
source /etc/profile.d/bigdata-etc.sh
2,修改配置文件
cp spark-env.sh.template spark-env.sh
cp slaves.template slaves
cp spark-defaults.conf.template spark-defaults.conf
3,vim /opt/spark/conf/spark-env.sh
export JAVA_HOME=/opt/jdk
export SCALA_HOME=/opt/scala
export SPARK_HOME=/opt/spark
export HADOOP_HOME=/opt/hadoop
export SPARK_WORKER_CORES=16 (你机器的最大core数)
export SPARK_WORKER_INSTANCES=1 (实例就1,别多改)
export SPARK_PID_DIR=/opt/spark/pid (默认是在/tmp下,时间一长就会被清理掉,造成无法正常关闭集群,no worker to stop)
export SPARK_WORKER_MEMORY=45g
export HADOOP_CONF_DIR=/opt/hadoop/etc/hadoop
export YARN_CONF_DIR=/opt/hadoop/etc/hadoop
export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=node01:2181,node02:2181,node03:2181,node04:2181,node05:2181 -Dspark.deploy.zookeeper.dir=/spark"
4,vim slaves
node03
node04
node05
5,vim
spark-defaults.conf
spark.master spark://192.168.3.2:7077,192.168.3.3:7077
6,分发
scp -r /opt/spark root@node02:/opt
scp -r /opt/spark root@node03:/opt
scp /etc/profile.d/bigdata-etc.sh root@node02:/etc/profile.d/
scp /etc/profile.d/bigdata-etc.sh root@node03:/etc/profile.d/
7,开启
node01:start-all.sh node02:start-master.sh
8,验证
本地local模式运行
spark-submit --class org.apache.spark.examples.SparkPi --master local[*] /opt/spark/examples/jars/spark-examples_*.jar
指定master运行
spark-submit --class org.apache.spark.examples.SparkPi --master spark://node01:7077 /opt/spark/examples/jars/spark-examples_*.jar
on yarn运行
spark-submit --class org.apache.spark.examples.SparkPi --master yarn --deploy-mode cluster /opt/spark/examples/jars/spark-examples_*.jar