spark源码解析:2.2 start-daemon.sh脚本分析

上节解析了start-master.sh脚本的内容并进行了debug:start-master.sh脚本解析,这节分析spark-daemon.sh脚本的内容并进行debug

1. spark-daemon.sh脚本内容

usage="Usage: spark-daemon.sh [--config ] (start|stop|submit|status)   "

# 如果没有提供参数,则打印usage使用说明并中断脚本
if [ $# -le 1 ]; then
  echo $usage
  exit 1
fi

# 判断是否配置了SPARK_HOME,如果没有则手动设置
if [ -z "${SPARK_HOME}" ]; then
  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi

# 通过.加上文件名临时执行spark-config.sh脚本
## 设置SPARK_HOME、SPARK_CONF_DIR以及python的一些环境变量
. "${SPARK_HOME}/sbin/spark-config.sh"

# 检查参数中是否包含--config,如果该参数的值不为目录则退出
if [ "$1" == "--config" ]
then
  # 移除第一个参数
  shift
  conf_dir="$1"
  if [ ! -d "$conf_dir" ]
  then
    echo "ERROR : $conf_dir is not a directory"
    echo $usage
    exit 1
  else
    export SPARK_CONF_DIR="$conf_dir"
  fi
  shift
fi

# 分别对option、command、instance进行赋值
option=$1
shift
command=$1
shift
instance=$1
shift

spark_rotate_log ()
{
    log=$1;
    num=5;
    if [ -n "$2" ]; then
	num=$2
    fi
    if [ -f "$log" ]; then # rotate logs
	while [ $num -gt 1 ]; do
	    prev=`expr $num - 1`
	    [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
	    num=$prev
	done
	mv "$log" "$log.$num";
    fi
}


# 通过.加上文件名临时执行load-spark-env.sh脚本
## 设置SPARK_HOME、SPARK_SCALA_VERSION环境变量

. "${SPARK_HOME}/bin/load-spark-env.sh"

if [ "$SPARK_IDENT_STRING" = "" ]; then
  export SPARK_IDENT_STRING="$USER"
fi

# 设置SPARK_PRINT_LAUNCH_COMMAND环境变量
export SPARK_PRINT_LAUNCH_COMMAND="1"

# 日志相关
if [ "$SPARK_LOG_DIR" = "" ]; then
  export SPARK_LOG_DIR="${SPARK_HOME}/logs"
fi
mkdir -p "$SPARK_LOG_DIR"
touch "$SPARK_LOG_DIR"/.spark_test > /dev/null 2>&1
TEST_LOG_DIR=$?
if [ "${TEST_LOG_DIR}" = "0" ]; then
  rm -f "$SPARK_LOG_DIR"/.spark_test
else
  chown "$SPARK_IDENT_STRING" "$SPARK_LOG_DIR"
fi

# 设置spark进程目录
if [ "$SPARK_PID_DIR" = "" ]; then
  SPARK_PID_DIR=/tmp
fi

# 定义log和pid变量
log="$SPARK_LOG_DIR/spark-$SPARK_IDENT_STRING-$command-$instance-$HOSTNAME.out"
pid="$SPARK_PID_DIR/spark-$SPARK_IDENT_STRING-$command-$instance.pid"

# 设置默认的调度优先级
if [ "$SPARK_NICENESS" = "" ]; then
    export SPARK_NICENESS=0
fi

# 该方法
execute_command() {
  # 看不懂+set是啥意思...不过debug的时候会进这个条件判断
  if [ -z ${SPARK_NO_DAEMONIZE+set} ]; then
      # 后台执行脚本命令,此处是获取传入该方法的所有参数,进行运行
      nohup -- "$@" >> $log 2>&1 < /dev/null &
      # 获取最新的后台运行的pid,即上条命令所占用的pid
      newpid="$!"
		
      echo "$newpid" > "$pid"

      # 共等待5秒来等待上述程序的运行
      for i in {1..10}
      do
        if [[ $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
           break
        fi
        sleep 0.5
      done
      
      # 再等待2秒
      sleep 2
      # 当程序正常运行时,打印日志
      if [[ ! $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
        echo "failed to launch: $@"
        tail -2 "$log" | sed 's/^/  /'
        echo "full log in $log"
      fi
  else
      "$@"
  fi
}

# 该方法主要判断是否能继续进行,并做一些预处理
run_command() {
  # 获取第一个参数赋值给mode
  mode="$1"
  shift
  # 创建进程目录
  mkdir -p "$SPARK_PID_DIR"
  
  # 判断进程文件是否存在,如果存在则给出提示:进程已存在,请先停止
  if [ -f "$pid" ]; then
    TARGET_ID="$(cat "$pid")"
    if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
      echo "$command running as process $TARGET_ID.  Stop it first."
      exit 1
    fi
  fi
  # 如果spark_master不为空,则在masters(master高可用的情况)中同步删除部分文件
  if [ "$SPARK_MASTER" != "" ]; then
    echo rsync from "$SPARK_MASTER"
    rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' "$SPARK_MASTER/" "${SPARK_HOME}"
  fi

  spark_rotate_log "$log"
  echo "starting $command, logging to $log"

  # 使用mode变量进行匹配
  case "$mode" in
    (class)
      # 当变量为class时调用execute_command,传入参数
      execute_command nice -n "$SPARK_NICENESS" "${SPARK_HOME}"/bin/spark-class "$command" "$@"
      ;;

    (submit)
      # 当变量为submit时调用execute_command,传入参数
      execute_command nice -n "$SPARK_NICENESS" bash "${SPARK_HOME}"/bin/spark-submit --class "$command" "$@"
      ;;

    (*)
      echo "unknown mode: $mode"
      exit 1
      ;;
  esac

}

# 使用option变量进行匹配
case $option in
  # 如果变量为submit则执行run_command方法,并将submit作为第一个参数传入
  (submit)
    run_command submit "$@"
    ;;
  # 如果变量为start则执行run_command方法,并将start作为第一个参数传入
  (start)
    run_command class "$@"
    ;;
  # 如果变量为stop,则执行kill命令,并删除进程文件	
  (stop)

    if [ -f $pid ]; then
      TARGET_ID="$(cat "$pid")"
      if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
        echo "stopping $command"
        kill "$TARGET_ID" && rm -f "$pid"
      else
        echo "no $command to stop"
      fi
    else
      echo "no $command to stop"
    fi
    ;;
  # 如果变量为status,则打印状态信息
  (status)

    if [ -f $pid ]; then
      TARGET_ID="$(cat "$pid")"
      if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
        echo $command is running.
        exit 0
      else
        echo $pid file is present but $command not running
        exit 1
      fi
    else
      echo $command not running.
      exit 2
    fi
    ;;
  # 如果变量未匹配上上述情况,则打印脚本使用方法
  (*)
    echo $usage
    exit 1
    ;;

esac

2. debug spark-daemon.sh脚本

因为start-master.sh脚本内部调用了spark-daemon.sh脚本,并传入了参数,所以可以通过外部传入参数并进行debug

sh -x /usr/local/spark/sbin/spark-daemon.sh start org.apache.spark.deploy.master.Master 1 --host s101 --port 7077 --webui-port 8080

+ usage='Usage: spark-daemon.sh [--config ] (start|stop|submit|status)   '

# 如果没有提供参数,则打印usage使用说明并中断脚本
+ '[' 9 -le 1 ']'

# 判断是否配置了SPARK_HOME,如果没有则手动设置
+ '[' -z /usr/local/spark ']'

# 通过.加上文件名临时执行spark-config.sh脚本
## 设置SPARK_HOME、SPARK_CONF_DIR以及python的一些环境变量
+ . /usr/local/spark/sbin/spark-config.sh
++ '[' -z /usr/local/spark ']'
++ export SPARK_CONF_DIR=/usr/local/spark/conf
++ SPARK_CONF_DIR=/usr/local/spark/conf
++ '[' -z '' ']'
++ export PYTHONPATH=/usr/local/spark/python:
++ PYTHONPATH=/usr/local/spark/python:
++ export PYTHONPATH=/usr/local/spark/python/lib/py4j-0.10.7-src.zip:/usr/local/spark/python:
++ PYTHONPATH=/usr/local/spark/python/lib/py4j-0.10.7-src.zip:/usr/local/spark/python:
++ export PYSPARK_PYTHONPATH_SET=1
++ PYSPARK_PYTHONPATH_SET=1

# 检查参数中是否包含--config,如果该参数的值不为目录则退出
+ '[' start == --config ']'


# 分别对option、command、instance进行赋值
+ option=start
+ shift
+ command=org.apache.spark.deploy.master.Master
+ shift
+ instance=1
+ shift

# 通过.加上文件名临时执行load-spark-env.sh脚本
## 设置SPARK_HOME、SPARK_SCALA_VERSION环境变量
+ . /usr/local/spark/bin/load-spark-env.sh
++ '[' -z /usr/local/spark ']'
++ '[' -z '' ']'
++ export SPARK_ENV_LOADED=1
++ SPARK_ENV_LOADED=1
++ export SPARK_CONF_DIR=/usr/local/spark/conf
++ SPARK_CONF_DIR=/usr/local/spark/conf
++ '[' -f /usr/local/spark/conf/spark-env.sh ']'
++ set -a
++ . /usr/local/spark/conf/spark-env.sh
+++ JAVA_HOME=/usr/local/jdk
+++ HADOOP_CONF_DIR=/usr/local/spark/hadoop/etc/hadoop
+++ SPARK_LOCAL_IP=s101
++ set +a
++ '[' -z '' ']'
++ ASSEMBLY_DIR2=/usr/local/spark/assembly/target/scala-2.11
++ ASSEMBLY_DIR1=/usr/local/spark/assembly/target/scala-2.12
++ [[ -d /usr/local/spark/assembly/target/scala-2.11 ]]
++ '[' -d /usr/local/spark/assembly/target/scala-2.11 ']'
++ export SPARK_SCALA_VERSION=2.12
++ SPARK_SCALA_VERSION=2.12

+ '[' '' = '' ']'
+ export SPARK_IDENT_STRING=hadoop
+ SPARK_IDENT_STRING=hadoop

# 设置SPARK_PRINT_LAUNCH_COMMAND环境变量
+ export SPARK_PRINT_LAUNCH_COMMAND=1
+ SPARK_PRINT_LAUNCH_COMMAND=1

# 日志相关
+ '[' '' = '' ']'
+ export SPARK_LOG_DIR=/usr/local/spark/logs
+ SPARK_LOG_DIR=/usr/local/spark/logs
+ mkdir -p /usr/local/spark/logs
+ touch /usr/local/spark/logs/.spark_test
+ TEST_LOG_DIR=0
+ '[' 0 = 0 ']'
+ rm -f /usr/local/spark/logs/.spark_test
+ '[' '' = '' ']'

# 设置spark进程目录
+ SPARK_PID_DIR=/tmp


# 定义log和pid变量
+ log=/usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out
+ pid=/tmp/spark-hadoop-org.apache.spark.deploy.master.Master-1.pid

# 设置默认的调度优先级
+ '[' '' = '' ']'
+ export SPARK_NICENESS=0
+ SPARK_NICENESS=0


# 使用option参数进行匹配,调用方法,传入不同的参数
+ case $option in
# 调用run_command方法,传入class参数以及上个脚本传入的参数
+ run_command class --host s101 --port 7077 --webui-port 8080


# run_command方法内容
+ mode=class
+ shift
# 创建进程目录
+ mkdir -p /tmp
# 判断进程文件是否存在,如果存在则给出提示:进程已存在,请先停止
+ '[' -f /tmp/spark-hadoop-org.apache.spark.deploy.master.Master-1.pid ']'
+ '[' '' '!=' '' ']'
+ spark_rotate_log /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out
+ log=/usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out
+ num=5
+ '[' -n '' ']'
+ '[' -f /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out ']'
+ '[' 5 -gt 1 ']'
++ expr 5 - 1
+ prev=4
+ '[' -f /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.4 ']'
+ mv /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.4 /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.5
+ num=4
+ '[' 4 -gt 1 ']'
++ expr 4 - 1
+ prev=3
+ '[' -f /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.3 ']'
+ mv /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.3 /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.4
+ num=3
+ '[' 3 -gt 1 ']'
++ expr 3 - 1
+ prev=2
+ '[' -f /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.2 ']'
+ mv /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.2 /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.3
+ num=2
+ '[' 2 -gt 1 ']'
++ expr 2 - 1
+ prev=1
+ '[' -f /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.1 ']'
+ mv /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.1 /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.2
+ num=1
+ '[' 1 -gt 1 ']'
+ mv /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out.1
+ echo 'starting org.apache.spark.deploy.master.Master, logging to /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out'
starting org.apache.spark.deploy.master.Master, logging to /usr/local/spark/logs/spark-hadoop-org.apache.spark.deploy.master.Master-1-s101.out

# 使用mode值:class进行匹配
+ case "$mode" in
# 调用execute_command方法
+ execute_command nice -n 0 /usr/local/spark/bin/spark-class org.apache.spark.deploy.master.Master --host s101 --port 7077 --webui-port 8080

#execute_command方法内容
+ '[' -z ']'
+ newpid=3150
+ echo 3150
+ for i in '{1..10}'
++ ps -p 3150 -o comm=

# 调用spark-class脚本
+ nohup -- nice -n 0 /usr/local/spark/bin/spark-class org.apache.spark.deploy.master.Master --host s101 --port 7077 --webui-port 8080
+ [[ bash =~ java ]]
+ sleep 0.5
+ for i in '{1..10}'
++ ps -p 3150 -o comm=
+ [[ java =~ java ]]
+ break
+ sleep 2
++ ps -p 3150 -o comm=
+ [[ ! java =~ java ]]

你可能感兴趣的:(spark)