spark3.x 集群部署

一.zookeeper集群部署

# 配置hosts解析
cat > /etc/hosts <<EOF
*.*.*.222 spark01
*.*.*.220 spark02
*.*.*.253 spark03
EOF

# 配置SSH免密,主备master节点对所有worker节点免密,需要在3个节点执行:
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
ssh-copy-id spark01
ssh-copy-id spark02
ssh-copy-id spark03

# 安装java依赖
#yum install -y epel-release
#yum install -y java-11-openjdk-devel

# 创建相关目录
mkdir -p /opt/zookeeper/{data,logs}


# 下载zk安装包
wget https://mirrors.aliyun.com/apache/zookeeper/zookeeper-3.7.0/apache-zookeeper-3.7.0-bin.tar.gz -P /tmp
tar -zxvf /tmp/apache-zookeeper-*-bin.tar.gz -C /opt/zookeeper --strip=1

# 配置环境变量
cat > /etc/profile.d/zookeeper.sh <<'EOF'
export ZK_HOME=/opt/zookeeper
export PATH=$ZK_HOME/bin:$PATH
EOF

source /etc/profile


# 复制zookeeper配置文件
cp /opt/zookeeper/conf/{zoo_sample.cfg,zoo.cfg}

# 修改zookeeper配置文件
cat > /opt/zookeeper/conf/zoo.cfg <<EOF
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/opt/zookeeper/data
dataLogDir=/opt/zookeeper/logs
clientPort=2181
server.1=spark01:2888:3888
server.2=spark02:2888:3888
server.3=spark03:2888:3888
EOF

# 创建myid文件,id在整体中必须是唯一的,并且应该具有1到255之间的值,主配置文件中的server id 要和其当前主节点中的myid保持一致

[root@spark01 ~]# echo '1' >/opt/zookeeper/data/myid
[root@spark02 ~]# echo '2' >/opt/zookeeper/data/myid
[root@spark03 ~]# echo '3' >/opt/zookeeper/data/myid


# 创建zookeeper用户
useradd -r -s /bin/bash zookeeper
chown -R zookeeper:zookeeper /opt/zookeeper


# 使用systemd管理zookeeper服务
cat > /usr/lib/systemd/system/zookeeper.service << EOF
[Unit]
Description=Zookeeper Service
After=network.target

[Service]
Type=forking
User=zookeeper
Group=zookeeper
Environment=JAVA_HOME=/usr/local/java
ExecStart=/opt/zookeeper/bin/zkServer.sh start /opt/zookeeper/conf/zoo.cfg
ExecStop=/opt/zookeeper/bin/zkServer.sh stop
PIDFile=/opt/zookeeper/data/zookeeper_server.pid
Restart=always
TimeoutSec=20
SuccessExitStatus=130 143


[Install]
WantedBy=multi-user.target
EOF


systemctl daemon-reload
systemctl start zookeeper
systemctl stop zookeeper
systemctl restart zookeeper
# 启动zookeeper服务并配置为开机启动
systemctl enable --now zookeeper

# 查看zookeeper服务运行状态
zkServer.sh status


# 只有leader节点在监听2888端口
ss -antlp | grep 888

在集群任意一个节点使用zkCli.sh即可连接当前节点的2181端口;连接以后会有一个zk的控制台
zkCli.sh -server spark01:2181

zkCli.sh -server spark01:2181,spark01:2181,spark01:2181
# 查看集群成员配置信息
config

2.spark集群部署

# 配置spark环境变量
cat > /etc/profile.d/spark.sh <<'EOF'
export SPARK_HOME=/opt/spark
export PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH
EOF

source /etc/profile

# 编辑spark-env.sh文件
cp $SPARK_HOME/conf/spark-env.sh.template $SPARK_HOME/conf/spark-env.sh

cat > $SPARK_HOME/conf/spark-env.sh <<EOF
export JAVA_HOME=/usr/local/java
export SPARK_MASTER_IP=spark01
export SPARK_MASTER_PORT=7077
export SPARK_MASTER_WEBUI_PORT=8090
export SPARK_WORKER_WEBUI_PORT=8091
export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER 
-Dspark.deploy.zookeeper.url=spark01:2181,spark02:2181,spark03:2181
-Dspark.deploy.zookeeper.dir=/spark"
EOF




"""
SPARK_MASTER_WEBUI_PORT 设置spark master webui端口,默认为8080
SPARK_WORKER_WEBUI_PORT 设置spark worker webui端口,默认为8081
spark.deploy.recoveryMode:设置Zookeeper去启动备用Master模式。
spark.deploy.zookeeper.url: 指定ZooKeeper的Server地址。
spark.deploy.zookeeper.dir: 保存集群元数据信息的文件、目录。
"""

# 修改workers文件

cp $SPARK_HOME/conf/workers.template $SPARK_HOME/conf/workers

cat > $SPARK_HOME/conf/workers <<EOF
spark02
spark03
EOF



# 将配置好的spark发到spark02,spark03服务器
cd /opt
scp -r spark spark02:`pwd`
scp -r spark spark03:`pwd`


# 添加添加spark01,spark02,spark03用户
useradd spark
chown -R spark:spark /opt/spark


# 在spark01节点启动master节点
/opt/spark/sbin/start-master.sh -h spark01

# 分别在 spark02和 spark03上执行下面的命令
/opt/spark/sbin/start-worker.sh spark://spark01:7077

zookeeper
参考:https://blog.csdn.net/networken/article/details/116207969
参考:https://www.codenong.com/js4c594b949b91/

你可能感兴趣的:(Spark,spark,大数据,big,data)