Hadoop启动过程中执行start-all.sh,后台发生了什么

Hadoop启动执行start-all.sh,后台发生了什么

1. 翻看start-all.sh脚本
有以下重要的几句shell命令:

#提示start-all.sh已经过时了,被${HADOOP_HOME}/sbin/start-#dfs.sh 以及start-yarn.sh代替
echo "This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh"

#执行/libexec/hadoop-config.sh
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh

#执行start-dfs.sh
 "${HADOOP_HDFS_HOME}"/sbin/start-dfs.sh --config $HADOOP_CONF_DIR

 #执行start-yarn.sh
   "${HADOOP_YARN_HOME}"/sbin/start-yarn.sh --config $HADOOP_CONF_DIR

其中,hadoop-config.sh脚本是一些hadoop的环境变量初始化。

2. 翻看start-dfs.sh脚本:

有以下重要的shell命令:

# Start hadoop dfs daemons.
# Optinally upgrade or rollback dfs state.
# Run this on master node.

##start-dfs.sh的执行语法,即可带-upgrade(更新),-rollback(回滚),或者-clusterId(单独启动某个slave节点)
usage="Usage: start-dfs.sh [-upgrade|-rollback] [other options such as -clusterId]" 

#启动hdfs-config.sh配置
.$HADOOP_LIBEXEC_DIR/hdfs-config.sh

#启动hadoop-daemons.sh,调用${HADOOP_HOME}/bin/hdfs namenode 启动namenode结点
NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -namenodes)
echo "Starting namenodes on [$NAMENODES]"
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
  --config "$HADOOP_CONF_DIR" \
  --hostnames "$NAMENODES" \
  --script "$bin/hdfs" start namenode $nameStartOpt

#启动hadoop-daemons.sh,调用${HADOOP_HOME}/bin/hdfs datanode 启动datanode结点
if [ -n "$HADOOP_SECURE_DN_USER" ]; then
  echo \
    "Attempting to start secure cluster, skipping datanodes. " \
    "Run start-secure-dns.sh as root to complete startup."
else
  "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
    --config "$HADOOP_CONF_DIR" \
    --script "$bin/hdfs" start datanode $dataStartOpt
fi

#启动hadoop-daemons.sh,调用调用${HADOOP_HOME}/bin/hdfs secondarynamenode 启动#secondarynamenode结点
SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>/dev/null)

if [ -n "$SECONDARY_NAMENODES" ]; then
  echo "Starting secondary namenodes [$SECONDARY_NAMENODES]"

  "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
      --config "$HADOOP_CONF_DIR" \
      --hostnames "$SECONDARY_NAMENODES" \
      --script "$bin/hdfs" start secondarynamenode
fi

#启动启动hadoop-daemons.sh,调用调用${HADOOP_HOME}/bin/hdfs journalnode
SHARED_EDITS_DIR=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.namenode.shared.edits.dir 2>&-)

case "$SHARED_EDITS_DIR" in
qjournal://*)
  JOURNAL_NODES=$(echo "$SHARED_EDITS_DIR" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
  echo "Starting journal nodes [$JOURNAL_NODES]"
  "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
      --config "$HADOOP_CONF_DIR" \
      --hostnames "$JOURNAL_NODES" \
      --script "$bin/hdfs" start journalnode ;;
esac

#启动启动hadoop-daemons.sh,调用调用${HADOOP_HOME}/bin/hdfs zkfc
#---------------------------------------------------------
# ZK Failover controllers, if auto-HA is enabled
AUTOHA_ENABLED=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.ha.automatic-failover.enabled)
if [ "$(echo "$AUTOHA_ENABLED" | tr A-Z a-z)" = "true" ]; then
  echo "Starting ZK Failover Controllers on NN hosts [$NAMENODES]"
  "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
    --config "$HADOOP_CONF_DIR" \
    --hostnames "$NAMENODES" \
    --script "$bin/hdfs" start zkfc
fi

3. 翻看${HADOOP_HOME}/bin/hdfs可执行脚本:


#hdfs语法: hdfs [config] [command]
echo "Usage: hdfs [--config confdir] COMMAND"

#启动hadoop namenode结点,启动类为:org.apache.hadoop.hdfs.server.namenode.NameNode

if [ "$COMMAND" = "namenode" ] ; then
  CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode'
  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NAMENODE_OPTS"

#启动hadoop zkfc, 启动类为:org.apache.hadoop.hdfs.tools.DFSZKFailoverController

elif [ "$COMMAND" = "zkfc" ] ; then
 CLASS='org.apache.hadoop.hdfs.tools.DFSZKFailoverController'
  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_ZKFC_OPTS"

#启动secondary结点,启动类为:org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode
elif [ "$COMMAND" = "secondarynamenode" ] ; then
  CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode'
  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_SECONDARYNAMENODE_OPTS"

#启动datanode结点,启动类为:org.apache.hadoop.hdfs.server.datanode.DataNode
elif [ "$COMMAND" = "datanode" ] ; then
  CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode'
  if [ "$starting_secure_dn" = "true" ]; then
    HADOOP_OPTS="$HADOOP_OPTS -jvm server $HADOOP_DATANODE_OPTS"
  else
    HADOOP_OPTS="$HADOOP_OPTS -server $HADOOP_DATANODE_OPTS"
  fi

#启动journalnode结点,启动类为:org.apache.hadoop.hdfs.qjournal.server.JournalNode
elif [ "$COMMAND" = "journalnode" ] ; then
  CLASS='org.apache.hadoop.hdfs.qjournal.server.JournalNode'
  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_JOURNALNODE_OPTS"

#dfs命令
elif [ "$COMMAND" = "dfs" ] ; then
  CLASS=org.apache.hadoop.fs.FsShell
  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"

#
elif [ "$COMMAND" = "dfsadmin" ] ; then
  CLASS=org.apache.hadoop.hdfs.tools.DFSAdmin
  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"

#
elif [ "$COMMAND" = "haadmin" ] ; then
  CLASS=org.apache.hadoop.hdfs.tools.DFSHAAdmin
  CLASSPATH=${CLASSPATH}:${TOOL_PATH}
  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"

#
elif [ "$COMMAND" = "fsck" ] ; then
  CLASS=org.apache.hadoop.hdfs.tools.DFSck
  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"

#负载均衡
elif [ "$COMMAND" = "balancer" ] ; then
  CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_BALANCER_OPTS"

#
elif [ "$COMMAND" = "jmxget" ] ; then
  CLASS=org.apache.hadoop.hdfs.tools.JMXGet

#
elif [ "$COMMAND" = "oiv" ] ; then
  CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB

#
elif [ "$COMMAND" = "oiv_legacy" ] ; then
  CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer

#
elif [ "$COMMAND" = "oev" ] ; then
 CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer

#
elif [ "$COMMAND" = "fetchdt" ] ; then
  CLASS=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher

#
elif [ "$COMMAND" = "getconf" ] ; then
  CLASS=org.apache.hadoop.hdfs.tools.GetConf

#
elif [ "$COMMAND" = "groups" ] ; then
  CLASS=org.apache.hadoop.hdfs.tools.GetGroups

#
elif [ "$COMMAND" = "snapshotDiff" ] ; then
  CLASS=org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff

#
elif [ "$COMMAND" = "lsSnapshottableDir" ] ; then
 CLASS=org.apache.hadoop.hdfs.tools.snapshot.LsSnapshottableDir

#
elif [ "$COMMAND" = "portmap" ] ; then
  CLASS=org.apache.hadoop.portmap.Portmap
  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_PORTMAP_OPTS"

#
elif [ "$COMMAND" = "nfs3" ] ; then
  CLASS=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3
  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NFS3_OPTS"

#
elif [ "$COMMAND" = "cacheadmin" ] ; then
  CLASS=org.apache.hadoop.hdfs.tools.CacheAdmin

#
else
  CLASS="$COMMAND"
fi

4. 翻看start-yarn.sh代码:

#启动所有的yarn daemons
# Start all yarn daemons.  Run this on master node.
# start resourceManager

#调用 ${HADOOP_HOME}/bin/yarn-daemon.sh,启动resourcemanager
"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR  start resourcemanager


#调用 ${HADOOP_HOME}/bin/yarn-daemons.sh,启动nodemanager
# start nodeManager
"$bin"/yarn-daemons.sh --config $YARN_CONF_DIR  start nodemanager

5. 翻看${HADOOP_HOME}bin/yarn代码:

#语法:yarn [config] [command]
echo "Usage: yarn [--config confdir] COMMAND" 

# figure out which class to run

#依据命令启动对应的类
if [ "$COMMAND" = "classpath" ] ; then
  echo $CLASSPATH
  exit
elif [ "$COMMAND" = "rmadmin" ] ; then
  CLASS='org.apache.hadoop.yarn.client.cli.RMAdminCLI'
  YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
elif [ "$COMMAND" = "application" ] || 
     [ "$COMMAND" = "applicationattempt" ] || 
     [ "$COMMAND" = "container" ]; then
  CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI
  YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
  set -- $COMMAND $@

#启动node
elif [ "$COMMAND" = "node" ] ; then
  CLASS=org.apache.hadoop.yarn.client.cli.NodeCLI
  YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"

#启动rescourcemanager
elif [ "$COMMAND" = "resourcemanager" ] ; then
  CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/rm-config/log4j.properties
  CLASS='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager'
  YARN_OPTS="$YARN_OPTS $YARN_RESOURCEMANAGER_OPTS"
  if [ "$YARN_RESOURCEMANAGER_HEAPSIZE" != "" ]; then
    JAVA_HEAP_MAX="-Xmx""$YARN_RESOURCEMANAGER_HEAPSIZE""m"
  fi
elif [ "$COMMAND" = "historyserver" ] ; then
  echo "DEPRECATED: Use of this command to start the timeline server is deprecated." 1>&2
  echo "Instead use the timelineserver command for it." 1>&2
  CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/ahs-config/log4j.properties
  CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
  YARN_OPTS="$YARN_OPTS $YARN_HISTORYSERVER_OPTS"
  if [ "$YARN_HISTORYSERVER_HEAPSIZE" != "" ]; then
    JAVA_HEAP_MAX="-Xmx""$YARN_HISTORYSERVER_HEAPSIZE""m"
  fi
elif [ "$COMMAND" = "timelineserver" ] ; then
  CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/timelineserver-config/log4j.properties
  CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
  YARN_OPTS="$YARN_OPTS $YARN_TIMELINESERVER_OPTS"
  if [ "$YARN_TIMELINESERVER_HEAPSIZE" != "" ]; then
    JAVA_HEAP_MAX="-Xmx""$YARN_TIMELINESERVER_HEAPSIZE""m"
  fi
elif [ "$COMMAND" = "nodemanager" ] ; then
  CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/nm-config/log4j.properties
  CLASS='org.apache.hadoop.yarn.server.nodemanager.NodeManager'
  YARN_OPTS="$YARN_OPTS -server $YARN_NODEMANAGER_OPTS"
  if [ "$YARN_NODEMANAGER_HEAPSIZE" != "" ]; then
    JAVA_HEAP_MAX="-Xmx""$YARN_NODEMANAGER_HEAPSIZE""m"
  fi
elif [ "$COMMAND" = "proxyserver" ] ; then
  CLASS='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer'
  YARN_OPTS="$YARN_OPTS $YARN_PROXYSERVER_OPTS"
  if [ "$YARN_PROXYSERVER_HEAPSIZE" != "" ]; then
    JAVA_HEAP_MAX="-Xmx""$YARN_PROXYSERVER_HEAPSIZE""m"
  fi
#版本
elif [ "$COMMAND" = "version" ] ; then
  CLASS=org.apache.hadoop.util.VersionInfo
  YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"

#在yarn上运行jar
elif [ "$COMMAND" = "jar" ] ; then
  CLASS=org.apache.hadoop.util.RunJar
  YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
elif [ "$COMMAND" = "logs" ] ; then
  CLASS=org.apache.hadoop.yarn.client.cli.LogsCLI
  YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
elif [ "$COMMAND" = "daemonlog" ] ; then
  CLASS=org.apache.hadoop.log.LogLevel
  YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
else
  CLASS=$COMMAND
fi

未完待续……

你可能感兴趣的:(hadoop)