(1)下载压缩文件到/opt/software/目录下
https://archive.apache.org/dist/spark/spark-2.4.4/spark-2.4.4-bin-hadoop2.6.tgz
[root@nodefour install]# cd /opt/software/
[root@nodefour software]# ll
总用量 2549868
-rw-r--r-- 1 root root 412461182 12月 21 14:02 apache-phoenix-4.14.0-cdh5.14.2-bin.tar.gz
-rw-r--r-- 1 root root 433895552 12月 8 14:59 hadoop-2.6.0-cdh5.14.2.tar.gz
-rw-r--r-- 1 root root 267038262 12月 17 09:10 hbase-1.2.0-cdh5.14.2.tar.gz
-rw-r--r-- 1 root root 91 12月 14 15:08 jd.txt
-rw-r--r-- 1 root root 42136632 12月 24 16:27 kafka_2.11-0.11.0.2.gz
-rw-r--r-- 1 root root 203728858 1月 4 00:28 spark-2.2.0-bin-hadoop2.7.tgz
-rw-r--r-- 1 root root 228065807 12月 24 16:27 spark-2.4.4-bin-hadoop2.6.tgz
-rw-r--r-- 1 root root 30742669 12月 21 11:25 sqoop-1.4.6-cdh5.14.2.tar.gz
-rw-r--r-- 1 root root 992975720 12月 12 17:32 zeppelin-0.8.1-bin-all.tgz
(2)解压到文件夹/opt/install/
[root@nodefour software]# tar -zxf spark-2.4.4-bin-hadoop2.6.tgz -C /opt/install/
[root@nodefour software]# cd /opt/install/
[root@nodefour install]# ll
总用量 12
drwxr-xr-x 18 root root 4096 12月 24 10:09 hadoop
lrwxrwxrwx 1 root root 22 12月 17 09:15 hbase -> hbase-1.2.0-cdh5.14.2/
drwxr-xr-x 32 1106 4001 4096 12月 20 15:28 hbase-1.2.0-cdh5.14.2
lrwxrwxrwx 1 root root 34 12月 8 09:59 hive -> /opt/install/hive-1.1.0-cdh5.14.2/
drwxr-xr-x 12 1106 4001 209 12月 10 10:26 hive-1.1.0-cdh5.14.2
drwxr-xr-x 13 1000 1000 211 8月 28 2019 spark-2.4.4-bin-hadoop2.6
lrwxrwxrwx 1 root root 35 12月 21 11:29 sqoop -> /opt/install/sqoop-1.4.6-cdh5.14.2/
drwxr-xr-x 10 root root 4096 3月 28 2018 sqoop-1.4.6-cdh5.14.2
drwxr-xr-x 12 503 wheel 223 12月 14 14:51 zeppelin-0.8.1-bin-all
drwxr-xr-x 4 root root 51 11月 19 15:09 zookeeper
(3)设置软连接
[root@nodefour install]# ln -s spark-2.4.4-bin-hadoop2.6/ spark
[root@nodefour install]# ll
总用量 12
drwxr-xr-x 18 root root 4096 12月 24 10:09 hadoop
lrwxrwxrwx 1 root root 22 12月 17 09:15 hbase -> hbase-1.2.0-cdh5.14.2/
drwxr-xr-x 32 1106 4001 4096 12月 20 15:28 hbase-1.2.0-cdh5.14.2
lrwxrwxrwx 1 root root 34 12月 8 09:59 hive -> /opt/install/hive-1.1.0-cdh5.14.2/
drwxr-xr-x 12 1106 4001 209 12月 10 10:26 hive-1.1.0-cdh5.14.2
lrwxrwxrwx 1 root root 26 1月 4 00:29 spark -> spark-2.4.4-bin-hadoop2.6/
drwxr-xr-x 13 1000 1000 211 8月 28 2019 spark-2.4.4-bin-hadoop2.6
lrwxrwxrwx 1 root root 35 12月 21 11:29 sqoop -> /opt/install/sqoop-1.4.6-cdh5.14.2/
drwxr-xr-x 10 root root 4096 3月 28 2018 sqoop-1.4.6-cdh5.14.2
drwxr-xr-x 12 503 wheel 223 12月 14 14:51 zeppelin-0.8.1-bin-all
drwxr-xr-x 4 root root 51 11月 19 15:09 zookeeper
[root@nodefour install]# cd
[root@nodefour ~]# vi /etc/profile
内容如下
export SPARK_HOME=/opt/install/spark-2.4.4-bin-hadoop2.6
export PATH=$SPARK_HOME/bin:$PATH
使配置生效
[root@nodefour ~]# source /etc/profile
(1)进入spark文件目录下
[root@nodefour ~]# cd $SPARK_HOME
(2)查看目录下文件
[root@nodefour spark-2.4.4-bin-hadoop2.6]# ll
总用量 104
drwxr-xr-x 2 1000 1000 4096 8月 28 2019 bin
drwxr-xr-x 2 1000 1000 230 8月 28 2019 conf
drwxr-xr-x 5 1000 1000 50 8月 28 2019 data
drwxr-xr-x 4 1000 1000 29 8月 28 2019 examples
drwxr-xr-x 2 1000 1000 12288 8月 28 2019 jars
drwxr-xr-x 4 1000 1000 38 8月 28 2019 kubernetes
-rw-r--r-- 1 1000 1000 21316 8月 28 2019 LICENSE
drwxr-xr-x 2 1000 1000 4096 8月 28 2019 licenses
-rw-r--r-- 1 1000 1000 42919 8月 28 2019 NOTICE
drwxr-xr-x 7 1000 1000 275 8月 28 2019 python
drwxr-xr-x 3 1000 1000 17 8月 28 2019 R
-rw-r--r-- 1 1000 1000 3952 8月 28 2019 README.md
-rw-r--r-- 1 1000 1000 164 8月 28 2019 RELEASE
drwxr-xr-x 2 1000 1000 4096 8月 28 2019 sbin
drwxr-xr-x 2 1000 1000 42 8月 28 2019 yarn
(3)重命名文件spark-env.sh.template为spark-env.sh
[root@nodefour spark-2.4.4-bin-hadoop2.6]# cp ./conf/spark-env.sh.template ./conf/spark-env.sh
[root@nodefour spark-2.4.4-bin-hadoop2.6]# vi ./conf/spark-env.sh
(4)内容如下(jdk和主机名换成自己的)
export JAVA_HOME=/opt/install/jdk
export HADOOP_HOME=/opt/install/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
#指定 master 的主机
export SPARK_MASTER_HOST=hadoop101
#指定 master 的端口
export SPARK_MASTER_PORT=7077
(1)首先启动了Hadoop(hdfs和yarn)
(2)启动spark集群
[root@nodefour spark-2.4.4-bin-hadoop2.6]# sbin/start-all.sh
starting org.apache.spark.deploy.master.Master, logging to /opt/install/spark-2.4.4-bin-hadoop2.6/logs/spark-root-org.apache.spark.deploy.master.Master-1-nodefour.out
localhost: starting org.apache.spark.deploy.worker.Worker, logging to /opt/install/spark-2.4.4-bin-hadoop2.6/logs/spark-root-org.apache.spark.deploy.worker.Worker-1-nodefour.out
(3)查看状态,是否有Master和Worker进程
[root@nodefour spark-2.4.4-bin-hadoop2.6]# jps
2608 Master
2720 Jps
2161 ResourceManager
2258 NodeManager
2674 Worker
1829 DataNode
1976 SecondaryNameNode
1740 NameNode
(4)启动 spark-shell 测试 scala 交互式环境
[root@nodefour spark-2.4.4-bin-hadoop2.6]# spark-shell --master spark://192.168.202.204:7077
21/01/04 00:40:05 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Spark context Web UI available at http://nodefour:4040
Spark context available as 'sc' (master = spark://192.168.202.204:7077, app id = app-20210104004019-0000).
Spark session available as 'spark'.
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 2.4.4
/_/
Using Scala version 2.11.12 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_241)
Type in expressions to have them evaluated.
Type :help for more information.
scala>