上节解析了start-master.sh脚本的内容并进行了debug:start-master.sh脚本解析,这节分析spark-daemon.sh脚本的内容并进行debug
usage="Usage: spark-daemon.sh [--config ] (start|stop|submit|status) "
# 如果没有提供参数,则打印usage使用说明并中断脚本
if [ $# -le 1 ]; then
echo $usage
exit 1
fi
# 判断是否配置了SPARK_HOME,如果没有则手动设置
if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi
# 通过.加上文件名临时执行spark-config.sh脚本
## 设置SPARK_HOME、SPARK_CONF_DIR以及python的一些环境变量
. "${SPARK_HOME}/sbin/spark-config.sh"
# 检查参数中是否包含--config,如果该参数的值不为目录则退出
if [ "$1" == "--config" ]
then
# 移除第一个参数
shift
conf_dir="$1"
if [ ! -d "$conf_dir" ]
then
echo "ERROR : $conf_dir is not a directory"
echo $usage
exit 1
else
export SPARK_CONF_DIR="$conf_dir"
fi
shift
fi
# 分别对option、command、instance进行赋值
option=$1
shift
command=$1
shift
instance=$1
shift
spark_rotate_log ()
{
log=$1;
num=5;
if [ -n "$2" ]; then
num=$2
fi
if [ -f "$log" ]; then # rotate logs
while [ $num -gt 1 ]; do
prev=`expr $num - 1`
[ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
num=$prev
done
mv "$log" "$log.$num";
fi
}
# 通过.加上文件名临时执行load-spark-env.sh脚本
## 设置SPARK_HOME、SPARK_SCALA_VERSION环境变量
. "${SPARK_HOME}/bin/load-spark-env.sh"
if [ "$SPARK_IDENT_STRING" = "" ]; then
export SPARK_IDENT_STRING="$USER"
fi
# 设置SPARK_PRINT_LAUNCH_COMMAND环境变量
export SPARK_PRINT_LAUNCH_COMMAND="1"
# 日志相关
if [ "$SPARK_LOG_DIR" = "" ]; then
export SPARK_LOG_DIR="${SPARK_HOME}/logs"
fi
mkdir -p "$SPARK_LOG_DIR"
touch "$SPARK_LOG_DIR"/.spark_test > /dev/null 2>&1
TEST_LOG_DIR=$?
if [ "${TEST_LOG_DIR}" = "0" ]; then
rm -f "$SPARK_LOG_DIR"/.spark_test
else
chown "$SPARK_IDENT_STRING" "$SPARK_LOG_DIR"
fi
# 设置spark进程目录
if [ "$SPARK_PID_DIR" = "" ]; then
SPARK_PID_DIR=/tmp
fi
# 定义log和pid变量
log="$SPARK_LOG_DIR/spark-$SPARK_IDENT_STRING-$command-$instance-$HOSTNAME.out"
pid="$SPARK_PID_DIR/spark-$SPARK_IDENT_STRING-$command-$instance.pid"
# 设置默认的调度优先级
if [ "$SPARK_NICENESS" = "" ]; then
export SPARK_NICENESS=0
fi
# 该方法
execute_command() {
# 看不懂+set是啥意思...不过debug的时候会进这个条件判断
if [ -z ${SPARK_NO_DAEMONIZE+set} ]; then
# 后台执行脚本命令,此处是获取传入该方法的所有参数,进行运行
nohup -- "$@" >> $log 2>&1 < /dev/null &
# 获取最新的后台运行的pid,即上条命令所占用的pid
newpid="$!"
echo "$newpid" > "$pid"
# 共等待5秒来等待上述程序的运行
for i in {1..10}
do
if [[ $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
break
fi
sleep 0.5
done
# 再等待2秒
sleep 2
# 当程序正常运行时,打印日志
if [[ ! $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
echo "failed to launch: $@"
tail -2 "$log" | sed 's/^/ /'
echo "full log in $log"
fi
else
"$@"
fi
}
# 该方法主要判断是否能继续进行,并做一些预处理
run_command() {
# 获取第一个参数赋值给mode
mode="$1"
shift
# 创建进程目录
mkdir -p "$SPARK_PID_DIR"
# 判断进程文件是否存在,如果存在则给出提示:进程已存在,请先停止
if [ -f "$pid" ]; then
TARGET_ID="$(cat "$pid")"
if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
echo "$command running as process $TARGET_ID. Stop it first."
exit 1
fi
fi
# 如果spark_master不为空,则在masters(master高可用的情况)中同步删除部分文件
if [ "$SPARK_MASTER" != "" ]; then
echo rsync from "$SPARK_MASTER"
rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' "$SPARK_MASTER/" "${SPARK_HOME}"
fi
spark_rotate_log "$log"
echo "starting $command, logging to $log"
# 使用mode变量进行匹配
case "$mode" in
(class)
# 当变量为class时调用execute_command,传入参数
execute_command nice -n "$SPARK_NICENESS" "${SPARK_HOME}"/bin/spark-class "$command" "$@"
;;
(submit)
# 当变量为submit时调用execute_command,传入参数
execute_command nice -n "$SPARK_NICENESS" bash "${SPARK_HOME}"/bin/spark-submit --class "$command" "$@"
;;
(*)
echo "unknown mode: $mode"
exit 1
;;
esac
}
# 使用option变量进行匹配
case $option in
# 如果变量为submit则执行run_command方法,并将submit作为第一个参数传入
(submit)
run_command submit "$@"
;;
# 如果变量为start则执行run_command方法,并将start作为第一个参数传入
(start)
run_command class "$@"
;;
# 如果变量为stop,则执行kill命令,并删除进程文件
(stop)
if [ -f $pid ]; then
TARGET_ID="$(cat "$pid")"
if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
echo "stopping $command"
kill "$TARGET_ID" && rm -f "$pid"
else
echo "no $command to stop"
fi
else
echo "no $command to stop"
fi
;;
# 如果变量为status,则打印状态信息
(status)
if [ -f $pid ]; then
TARGET_ID="$(cat "$pid")"
if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
echo $command is running.
exit 0
else
echo $pid file is present but $command not running
exit 1
fi
else
echo $command not running.
exit 2
fi
;;
# 如果变量未匹配上上述情况,则打印脚本使用方法
(*)
echo $usage
exit 1
;;
esac
因为start-master.sh脚本内部调用了spark-daemon.sh脚本,并传入了参数,所以可以通过外部传入参数并进行debug
sh -x /usr/local/spark/sbin/spark-daemon.sh start org.apache.spark.deploy.master.Master 1 --host s101 --port 7077 --webui-port 8080
+ usage='Usage: spark-daemon.sh [--config ] (start|stop|submit|status) '
# 如果没有提供参数,则打印usage使用说明并中断脚本
+ '[' 9 -le 1 ']'
# 判断是否配置了SPARK_HOME,如果没有则手动设置
+ '[' -z /usr/local/spark ']'
# 通过.加上文件名临时执行spark-config.sh脚本
## 设置SPARK_HOME、SPARK_CONF_DIR以及python的一些环境变量
+ . /usr/local/spark/sbin/spark-config.sh
++ '[' -z /usr/local/spark ']'
++ export SPARK_CONF_DIR=/usr/local/spark/conf
++ SPARK_CONF_DIR=/usr/local/spark/conf
++ '[' -z '' ']'
++ export PYTHONPATH=/usr/local/spark/python:
++ PYTHONPATH=/usr/local/spark/python:
++ export PYTHONPATH=/usr/local/spark/python/lib/py4j-0.10.7-src.zip:/usr/local/spark/python:
++ PYTHONPATH=/usr/local/spark/python/lib/py4j-0.10.7-src.zip:/usr/local/spark/python:
++ export PYSPARK_PYTHONPATH_SET=1
++ PYSPARK_PYTHONPATH_SET=1
# 检查参数中是否包含--config,如果该参数的值不为目录则退出
+ '[' start == --config ']'
# 分别对option、command、instance进行赋值
+ option=start
+ shift
+ command=org.apache.spark.deploy.master.Master
+ shift
+ instance=1
+ shift
# 通过.加上文件名临时执行load-spark-env.sh脚本
## 设置SPARK_HOME、SPARK_SCALA_VERSION环境变量
+ . /usr/local/spark/bin/load-spark-env.sh
++ '[' -z /usr/local/spark ']'
++ '[' -z '' ']'
++ export SPARK_ENV_LOADED=1
++ SPARK_ENV_LOADED=1
++ export SPARK_CONF_DIR=/usr/local/spark/conf
++ SPARK_CONF_DIR=/usr/local/spark/conf
++ '[' -f /usr/local/spark/conf/spark-env.sh ']'
++ set -a
++ . /usr/local/spark/conf/spark-env.sh
+++ JAVA_HOME=/usr/local/jdk
+++ HADOOP_CONF_DIR=/usr/local/spark/hadoop/etc/hadoop
+++ SPARK_LOCAL_IP=s101
++ set +a
++ '[' -z '' ']'
++ ASSEMBLY_DIR2=/usr/local/spark/assembly/target/scala-2.11
++ ASSEMBLY_DIR1=/usr/local/spark/assembly/target/scala-2.12
++ [[ -d /usr/local/spark/assembly/target/scala-2.11 ]]
++ '[' -d /usr/local/spark/assembly/target/scala-2.11 ']'
++ export SPARK_SCALA_VERSION=2.12
++ SPARK_SCALA_VERSION=2.12
+ '[' '' = '' ']'
+ export SPARK_IDENT_STRING=hadoop
+ SPARK_IDENT_STRING=hadoop
# 设置SPARK_PRINT_LAUNCH_COMMAND环境变量
+ export SPARK_PRINT_LAUNCH_COMMAND=1
+ SPARK_PRINT_LAUNCH_COMMAND=1
# 日志相关
+ '[' '' = '' ']'
+ export SPARK_LOG_DIR=/usr/local/spark/logs
+ SPARK_LOG_DIR=/usr/local/spark/logs
+ mkdir -p /usr/local/spark/logs
+ touch /usr/local/spark/logs/.spark_test
+ TEST_LOG_DIR=0
+ '[' 0 = 0 ']'
+ rm -f /usr/local/spark/logs/.spark_test
+ '[' '' = '' ']'
# 设置spark进程目录
+ SPARK_PID_DIR=/tmp
# 定义log和pid变量
+ log=/usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out
+ pid=/tmp/spark-hadoop-org.apache.spark.deploy.master.Master-1.pid
# 设置默认的调度优先级
+ '[' '' = '' ']'
+ export SPARK_NICENESS=0
+ SPARK_NICENESS=0
# 使用option参数进行匹配,调用方法,传入不同的参数
+ case $option in
# 调用run_command方法,传入class参数以及上个脚本传入的参数
+ run_command class --host s101 --port 7077 --webui-port 8080
# run_command方法内容
+ mode=class
+ shift
# 创建进程目录
+ mkdir -p /tmp
# 判断进程文件是否存在,如果存在则给出提示:进程已存在,请先停止
+ '[' -f /tmp/spark-hadoop-org.apache.spark.deploy.master.Master-1.pid ']'
+ '[' '' '!=' '' ']'
+ spark_rotate_log /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out
+ log=/usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out
+ num=5
+ '[' -n '' ']'
+ '[' -f /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out ']'
+ '[' 5 -gt 1 ']'
++ expr 5 - 1
+ prev=4
+ '[' -f /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.4 ']'
+ mv /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.4 /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.5
+ num=4
+ '[' 4 -gt 1 ']'
++ expr 4 - 1
+ prev=3
+ '[' -f /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.3 ']'
+ mv /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.3 /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.4
+ num=3
+ '[' 3 -gt 1 ']'
++ expr 3 - 1
+ prev=2
+ '[' -f /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.2 ']'
+ mv /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.2 /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.3
+ num=2
+ '[' 2 -gt 1 ']'
++ expr 2 - 1
+ prev=1
+ '[' -f /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.1 ']'
+ mv /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.1 /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.2
+ num=1
+ '[' 1 -gt 1 ']'
+ mv /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.1
+ echo 'starting org.apache.spark.deploy.master.Master, logging to /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out'
starting org.apache.spark.deploy.master.Master, logging to /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out
# 使用mode值:class进行匹配
+ case "$mode" in
# 调用execute_command方法
+ execute_command nice -n 0 /usr/local/spark/bin/spark-class org.apache.spark.deploy.master.Master --host s101 --port 7077 --webui-port 8080
#execute_command方法内容
+ '[' -z ']'
+ newpid=3150
+ echo 3150
+ for i in '{1..10}'
++ ps -p 3150 -o comm=
# 调用spark-class脚本
+ nohup -- nice -n 0 /usr/local/spark/bin/spark-class org.apache.spark.deploy.master.Master --host s101 --port 7077 --webui-port 8080
+ [[ bash =~ java ]]
+ sleep 0.5
+ for i in '{1..10}'
++ ps -p 3150 -o comm=
+ [[ java =~ java ]]
+ break
+ sleep 2
++ ps -p 3150 -o comm=
+ [[ ! java =~ java ]]