1. 翻看start-all.sh脚本
有以下重要的几句shell命令:
#提示start-all.sh已经过时了,被${HADOOP_HOME}/sbin/start-#dfs.sh 以及start-yarn.sh代替
echo "This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh"
#执行/libexec/hadoop-config.sh
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
#执行start-dfs.sh
"${HADOOP_HDFS_HOME}"/sbin/start-dfs.sh --config $HADOOP_CONF_DIR
#执行start-yarn.sh
"${HADOOP_YARN_HOME}"/sbin/start-yarn.sh --config $HADOOP_CONF_DIR
其中,hadoop-config.sh脚本是一些hadoop的环境变量初始化。
2. 翻看start-dfs.sh脚本:
有以下重要的shell命令:
# Start hadoop dfs daemons.
# Optinally upgrade or rollback dfs state.
# Run this on master node.
##start-dfs.sh的执行语法,即可带-upgrade(更新),-rollback(回滚),或者-clusterId(单独启动某个slave节点)
usage="Usage: start-dfs.sh [-upgrade|-rollback] [other options such as -clusterId]"
#启动hdfs-config.sh配置
.$HADOOP_LIBEXEC_DIR/hdfs-config.sh
#启动hadoop-daemons.sh,调用${HADOOP_HOME}/bin/hdfs namenode 启动namenode结点
NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -namenodes)
echo "Starting namenodes on [$NAMENODES]"
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \
--hostnames "$NAMENODES" \
--script "$bin/hdfs" start namenode $nameStartOpt
#启动hadoop-daemons.sh,调用${HADOOP_HOME}/bin/hdfs datanode 启动datanode结点
if [ -n "$HADOOP_SECURE_DN_USER" ]; then
echo \
"Attempting to start secure cluster, skipping datanodes. " \
"Run start-secure-dns.sh as root to complete startup."
else
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \
--script "$bin/hdfs" start datanode $dataStartOpt
fi
#启动hadoop-daemons.sh,调用调用${HADOOP_HOME}/bin/hdfs secondarynamenode 启动#secondarynamenode结点
SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>/dev/null)
if [ -n "$SECONDARY_NAMENODES" ]; then
echo "Starting secondary namenodes [$SECONDARY_NAMENODES]"
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \
--hostnames "$SECONDARY_NAMENODES" \
--script "$bin/hdfs" start secondarynamenode
fi
#启动启动hadoop-daemons.sh,调用调用${HADOOP_HOME}/bin/hdfs journalnode
SHARED_EDITS_DIR=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.namenode.shared.edits.dir 2>&-)
case "$SHARED_EDITS_DIR" in
qjournal://*)
JOURNAL_NODES=$(echo "$SHARED_EDITS_DIR" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
echo "Starting journal nodes [$JOURNAL_NODES]"
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \
--hostnames "$JOURNAL_NODES" \
--script "$bin/hdfs" start journalnode ;;
esac
#启动启动hadoop-daemons.sh,调用调用${HADOOP_HOME}/bin/hdfs zkfc
#---------------------------------------------------------
# ZK Failover controllers, if auto-HA is enabled
AUTOHA_ENABLED=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.ha.automatic-failover.enabled)
if [ "$(echo "$AUTOHA_ENABLED" | tr A-Z a-z)" = "true" ]; then
echo "Starting ZK Failover Controllers on NN hosts [$NAMENODES]"
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \
--hostnames "$NAMENODES" \
--script "$bin/hdfs" start zkfc
fi
3. 翻看${HADOOP_HOME}/bin/hdfs可执行脚本:
#hdfs语法: hdfs [config] [command]
echo "Usage: hdfs [--config confdir] COMMAND"
#启动hadoop namenode结点,启动类为:org.apache.hadoop.hdfs.server.namenode.NameNode
if [ "$COMMAND" = "namenode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode'
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NAMENODE_OPTS"
#启动hadoop zkfc, 启动类为:org.apache.hadoop.hdfs.tools.DFSZKFailoverController
elif [ "$COMMAND" = "zkfc" ] ; then
CLASS='org.apache.hadoop.hdfs.tools.DFSZKFailoverController'
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_ZKFC_OPTS"
#启动secondary结点,启动类为:org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode
elif [ "$COMMAND" = "secondarynamenode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode'
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_SECONDARYNAMENODE_OPTS"
#启动datanode结点,启动类为:org.apache.hadoop.hdfs.server.datanode.DataNode
elif [ "$COMMAND" = "datanode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode'
if [ "$starting_secure_dn" = "true" ]; then
HADOOP_OPTS="$HADOOP_OPTS -jvm server $HADOOP_DATANODE_OPTS"
else
HADOOP_OPTS="$HADOOP_OPTS -server $HADOOP_DATANODE_OPTS"
fi
#启动journalnode结点,启动类为:org.apache.hadoop.hdfs.qjournal.server.JournalNode
elif [ "$COMMAND" = "journalnode" ] ; then
CLASS='org.apache.hadoop.hdfs.qjournal.server.JournalNode'
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_JOURNALNODE_OPTS"
#dfs命令
elif [ "$COMMAND" = "dfs" ] ; then
CLASS=org.apache.hadoop.fs.FsShell
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
#
elif [ "$COMMAND" = "dfsadmin" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.DFSAdmin
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
#
elif [ "$COMMAND" = "haadmin" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.DFSHAAdmin
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
#
elif [ "$COMMAND" = "fsck" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.DFSck
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
#负载均衡
elif [ "$COMMAND" = "balancer" ] ; then
CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_BALANCER_OPTS"
#
elif [ "$COMMAND" = "jmxget" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.JMXGet
#
elif [ "$COMMAND" = "oiv" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
#
elif [ "$COMMAND" = "oiv_legacy" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
#
elif [ "$COMMAND" = "oev" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
#
elif [ "$COMMAND" = "fetchdt" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher
#
elif [ "$COMMAND" = "getconf" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.GetConf
#
elif [ "$COMMAND" = "groups" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.GetGroups
#
elif [ "$COMMAND" = "snapshotDiff" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff
#
elif [ "$COMMAND" = "lsSnapshottableDir" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.snapshot.LsSnapshottableDir
#
elif [ "$COMMAND" = "portmap" ] ; then
CLASS=org.apache.hadoop.portmap.Portmap
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_PORTMAP_OPTS"
#
elif [ "$COMMAND" = "nfs3" ] ; then
CLASS=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NFS3_OPTS"
#
elif [ "$COMMAND" = "cacheadmin" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.CacheAdmin
#
else
CLASS="$COMMAND"
fi
4. 翻看start-yarn.sh代码:
#启动所有的yarn daemons
# Start all yarn daemons. Run this on master node.
# start resourceManager
#调用 ${HADOOP_HOME}/bin/yarn-daemon.sh,启动resourcemanager
"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR start resourcemanager
#调用 ${HADOOP_HOME}/bin/yarn-daemons.sh,启动nodemanager
# start nodeManager
"$bin"/yarn-daemons.sh --config $YARN_CONF_DIR start nodemanager
5. 翻看${HADOOP_HOME}bin/yarn代码:
#语法:yarn [config] [command]
echo "Usage: yarn [--config confdir] COMMAND"
# figure out which class to run
#依据命令启动对应的类
if [ "$COMMAND" = "classpath" ] ; then
echo $CLASSPATH
exit
elif [ "$COMMAND" = "rmadmin" ] ; then
CLASS='org.apache.hadoop.yarn.client.cli.RMAdminCLI'
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
elif [ "$COMMAND" = "application" ] ||
[ "$COMMAND" = "applicationattempt" ] ||
[ "$COMMAND" = "container" ]; then
CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
set -- $COMMAND $@
#启动node
elif [ "$COMMAND" = "node" ] ; then
CLASS=org.apache.hadoop.yarn.client.cli.NodeCLI
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
#启动rescourcemanager
elif [ "$COMMAND" = "resourcemanager" ] ; then
CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/rm-config/log4j.properties
CLASS='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager'
YARN_OPTS="$YARN_OPTS $YARN_RESOURCEMANAGER_OPTS"
if [ "$YARN_RESOURCEMANAGER_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_RESOURCEMANAGER_HEAPSIZE""m"
fi
elif [ "$COMMAND" = "historyserver" ] ; then
echo "DEPRECATED: Use of this command to start the timeline server is deprecated." 1>&2
echo "Instead use the timelineserver command for it." 1>&2
CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/ahs-config/log4j.properties
CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
YARN_OPTS="$YARN_OPTS $YARN_HISTORYSERVER_OPTS"
if [ "$YARN_HISTORYSERVER_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_HISTORYSERVER_HEAPSIZE""m"
fi
elif [ "$COMMAND" = "timelineserver" ] ; then
CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/timelineserver-config/log4j.properties
CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
YARN_OPTS="$YARN_OPTS $YARN_TIMELINESERVER_OPTS"
if [ "$YARN_TIMELINESERVER_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_TIMELINESERVER_HEAPSIZE""m"
fi
elif [ "$COMMAND" = "nodemanager" ] ; then
CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/nm-config/log4j.properties
CLASS='org.apache.hadoop.yarn.server.nodemanager.NodeManager'
YARN_OPTS="$YARN_OPTS -server $YARN_NODEMANAGER_OPTS"
if [ "$YARN_NODEMANAGER_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_NODEMANAGER_HEAPSIZE""m"
fi
elif [ "$COMMAND" = "proxyserver" ] ; then
CLASS='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer'
YARN_OPTS="$YARN_OPTS $YARN_PROXYSERVER_OPTS"
if [ "$YARN_PROXYSERVER_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_PROXYSERVER_HEAPSIZE""m"
fi
#版本
elif [ "$COMMAND" = "version" ] ; then
CLASS=org.apache.hadoop.util.VersionInfo
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
#在yarn上运行jar
elif [ "$COMMAND" = "jar" ] ; then
CLASS=org.apache.hadoop.util.RunJar
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
elif [ "$COMMAND" = "logs" ] ; then
CLASS=org.apache.hadoop.yarn.client.cli.LogsCLI
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
elif [ "$COMMAND" = "daemonlog" ] ; then
CLASS=org.apache.hadoop.log.LogLevel
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
else
CLASS=$COMMAND
fi
未完待续……