操作系统:Centos 7
Hadoop版本:2.9.2
JDK版本:1.8.0_221
Spark版本:2.4.7
集群规划:
主机名 | IP |
---|---|
master | 192.168.1.121 |
slave1 | 192.168.1.122 |
slave2 | 192.168.1.123 |
下载地址:https://mirrors.tuna.tsinghua.edu.cn/apache/spark/spark-2.4.7/
首先将下载的压缩包解压到home/apps/目录下
[root@master dev]# mkdir -p /home/apps/
[root@master dev]# tar -zxvf spark-2.4.7-bin-hadoop2.7.tgz -C /home/apps
[root@master dev]# cd /home/apps/
[root@master apps]# ll
total 24
drwxr-xr-x. 7 root root 4096 Jan 23 10:33 flume
drwxr-xr-x. 7 root root 4096 Jan 21 17:13 hbase
drwxr-xr-x. 7 root root 4096 Jan 22 15:04 kafka
drwxr-xr-x. 15 centos centos 4096 Jan 23 14:38 spark-2.4.7-bin-hadoop2.7
drwxr-xr-x. 9 centos centos 4096 Dec 19 2017 sqoop
drwxr-xr-x. 8 root root 4096 Jan 15 20:58 zookeeper
将文件名改为spark
[root@master dev]# mv spark-2.4.7-bin-hadoop2.7 spark
spark需要配置spark-env.sh、slaves文件
复制spark-env.sh.template文件为spark-env.sh
复制slaves.template文件为slaves
[root@master apps]# cd spark/conf/
[root@master conf]# cp spark-env.sh.template spark-env.sh
[root@master conf]# cp slaves.template slaves
配置spark文件
[root@master conf]# vi spark-env.sh
在其末尾添加以下配置
#配置JAVA_HOME,注意更改路径
export JAVA_HOME=/usr/local/jdk/jdk1.8.0_221/
#设置Master的主机名
export SPARK_MASTER_HOST=master
#每一个Worker最多可以使用的内存
export SPARK_WORKER_MEMORY=1g
#每一个Worker最多可以使用的cpu core的个数
export SPARK_WORKER_CORES=1
#提交Application的端口
export SPARK_MASTER_PORT=7077
#spark任务有很大可能性需要去HDFS上读取文件,所以需要配置,注意更改路径
export SPARK_CONF_DIR=/home/hadoop/hadoop-2.9.2/etc/hadoop
配置slaves文件
[root@master conf]# vi slaves
在其末尾添加每个节点的主机名
master
slave1
slave2
配置环境变量(每个节点都需要配置环境变量)
[root@master conf]# vi /etc/profile
在其末尾添加以下配置
#spark的安装路径
export SPARK_HOME=/home/apps/spark
export PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH
使用source命令使添加的环境变量立即生效
[root@master conf]# source /etc/profile
同样的操作,在slave1和slave2上面配置
注意:因为Hadoop也配置了环境变量,它们的名字一样,所以需要更改Hadoop或者spark启动文件名,如果不更改文件名,那么很有可能将启动不成功
更改sbin目录下的启动文件名
[root@master conf]# cd /home/apps/spark/sbin
[root@master sbin]# ll
total 92
-rwxr-xr-x. 1 centos centos 2803 Sep 8 13:48 slaves.sh
-rwxr-xr-x. 1 centos centos 1429 Sep 8 13:48 spark-config.sh
-rwxr-xr-x. 1 centos centos 5689 Sep 8 13:48 spark-daemon.sh
-rwxr-xr-x. 1 centos centos 1262 Sep 8 13:48 spark-daemons.sh
-rwxr-xr-x. 1 centos centos 1274 Sep 8 13:48 start-history-server.sh
-rwxr-xr-x. 1 centos centos 2050 Sep 8 13:48 start-master.sh
-rwxr-xr-x. 1 centos centos 1877 Sep 8 13:48 start-mesos-dispatcher.sh
-rwxr-xr-x. 1 centos centos 1423 Sep 8 13:48 start-mesos-shuffle-service.sh
-rwxr-xr-x. 1 centos centos 1279 Sep 8 13:48 start-shuffle-service.sh
-rwxr-xr-x. 1 centos centos 3151 Sep 8 13:48 start-slave.sh
-rwxr-xr-x. 1 centos centos 1527 Sep 8 13:48 start-slaves.sh
-rwxr-xr-x. 1 centos centos 1190 Sep 8 13:48 start-all.sh
-rwxr-xr-x. 1 centos centos 1857 Sep 8 13:48 start-thriftserver.sh
-rwxr-xr-x. 1 centos centos 1056 Sep 8 13:48 stop-history-server.sh
-rwxr-xr-x. 1 centos centos 1080 Sep 8 13:48 stop-master.sh
-rwxr-xr-x. 1 centos centos 1227 Sep 8 13:48 stop-mesos-dispatcher.sh
-rwxr-xr-x. 1 centos centos 1084 Sep 8 13:48 stop-mesos-shuffle-service.sh
-rwxr-xr-x. 1 centos centos 1067 Sep 8 13:48 stop-shuffle-service.sh
-rwxr-xr-x. 1 centos centos 1557 Sep 8 13:48 stop-slave.sh
-rwxr-xr-x. 1 centos centos 1064 Sep 8 13:48 stop-slaves.sh
-rwxr-xr-x. 1 centos centos 1478 Sep 8 13:48 stop-all.sh
-rwxr-xr-x. 1 centos centos 1066 Sep 8 13:48 stop-thriftserver.sh
[root@master sbin]# mv start-all.sh start-spark-all.sh
[root@master sbin]# mv stop-all.sh stop-spark-all.sh
将master上面的spark文件夹传输到其他节点上
[root@master sbin]# scp -r /home/apps/spark/ root@slave1:/home/apps/
[root@master sbin]# scp -r /home/apps/spark/ root@slave2:/home/apps/
spark启动命令
[root@master sbin]# start-spark-all.sh
starting org.apache.spark.deploy.master.Master, logging to /home/apps/spark/logs/spark-root-org.apache.spark.deploy.master.Master-1-master.out
slave2: starting org.apache.spark.deploy.worker.Worker, logging to /home/apps/spark/logs/spark-root-org.apache.spark.deploy.worker.Worker-1-slave2.out
slave3: starting org.apache.spark.deploy.worker.Worker, logging to /home/apps/spark/logs/spark-root-org.apache.spark.deploy.worker.Worker-1-slave3.out
master: starting org.apache.spark.deploy.worker.Worker, logging to /home/apps/spark/logs/spark-root-org.apache.spark.deploy.worker.Worker-1-master.out
启动成功
[root@master sbin]# cd /home/apps/bin/
[root@master bin]# ./spark-shell
21/01/23 15:30:38 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
21/01/23 15:30:50 WARN util.Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
Spark context Web UI available at http://master:4041
Spark context available as 'sc' (master = local[*], app id = local-1611387050873).
Spark session available as 'spark'.
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 2.4.7
/_/
Using Scala version 2.11.12 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_221)
Type in expressions to have them evaluated.
Type :help for more information.
scala>
结语:大数据Hadoop笔记 Spark 安装与配置