序号 | bigdata-001 | bigdata-002 | bigdata-003 | bigdata-004 | bigdata-005 |
---|---|---|---|---|---|
IP | x x x | x x x | xxx | xxx | xxx |
组件 | hadoop1 | hadoop2 | hadoop3 | hadoop4 | hadoop5 |
内存 | 64G | 64G | 128G | 128G | 128G |
CPU核 | 16 | 16 | 32 | 32 | 32 |
Hadoop-3.3.4 | NameNode | NameNode | DataNode | DataNode | DataNode |
ResourceManager | ResourceManager | NodeManager | NodeManager | NodeManager | |
DFSZKFailoverController | DFSZKFailoverController | JournalNode | JournalNode | JournalNode | |
HistoryServer | |||||
Zookeeper-3.5.7 | zk | zk | zk |
useradd hadoop
passwd hadoop
visudo
# 在root ALL=(ALL) ALL一行下面添加
hadoop ALL=(ALL) NOPASSWD: ALL
sudo vim /etc/hosts
xxxx hadoop1
xxxx hadoop2
mkdir ~/.ssh
cd ~/.ssh
ssh-keygen -t rsa -m PEM
touch authorized_keys
#将authorized_keys配置好后,在编其他机器增加ssh的authorized_keys内入复制过去(必须有该机器的id_rsa.pub)
tar -zxvf jdk-8u212-linux-x64.tar.gz -C /data/module/
mv jdk1.8.0_212/ jdk1.8.0_212
#设置环境变量配置JDK
sudo vim /etc/profile.d/my_env.sh
#添加JAVA_HOME
export JAVA_HOME=/data/module/jdk1.8.0_212
export PATH=$PATH:$JAVA_HOME/bin
# 让环境变量生效
source /etc/profile.d/my_env.sh
# 测试JDK是否安装成功
java -version
#每一台机器都需要配置
#解压安装
# (1)解压Zookeeper安装包到/data/module/目录下
[hadoop@master1 software]$ tar -zxvf apache-zookeeper-3.5.7-bin.tar.gz -C /data/module/
# (2)修改/data/module/apache-zookeeper-3.5.7-bin名称为zookeeper-3.5.7
[hadoop@master1 module]$ mv apache-zookeeper-3.5.7-bin/ zk-3.5.7
# 2)配置服务器编号
# (1)在/data/module/zk-3.5.7/这个目录下创建zkData
[hadoop@master1 zookeeper]$ mkdir zkData
# (2)在/data/module/zk-3.5.7/zkData目录下创建一个myid的文件
[hadoop@master1 zkData]$ vim myid
# 添加myid文件,注意一定要在linux里面创建,在notepad++里面很可能乱码
# 在文件中添加与server对应的编号(保证每一个节点对饮的编号均不同):
2
# 3)配置zoo.cfg文件
# (1)重命名/data/module/zk-3.5.7/conf这个目录下的zoo_sample.cfg为zoo.cfg
[hadoop@master1 conf]$ mv zoo_sample.cfg zoo.cfg
# (2)打开zoo.cfg文件
[hadoop@master1 conf]$ vim zoo.cfg
# 修改数据存储路径配置
dataDir=/data/module/zookeeper-3.5.7/zkData
# 增加如下配置
#######################cluster##########################
server.1=hadoop3:2888:3888
server.2=hadoop4:2888:3888
server.3=hadoop5:2888:3888
# (3)同步/data/module/zookeeper-3.5.7目录内容到master2、common1
# 1)用SecureCRT工具将hadoop-3.3.4.tar.gz导入到data目录下面的software文件夹下面
# 2)进入到Hadoop安装包路径下
[hadoop@master1 ~]$ cd /data/software/
# 3)解压安装文件到/data/module下面
[hadoop@master1 software]$ tar -zxvf hadoop-3.3.4.tar.gz -C /data/module/
# 4)查看是否解压成功
[hadoop@master1 software]$ ls /data/module/hadoop-3.3.4
# 5)重命名
[hadoop@master1 software]$ mv /data/module/hadoop-3.3.4 /data/module/hadoop-3.3.4
# 7)将Hadoop添加到环境变量
# (1)获取Hadoop安装路径
[hadoop@master1 hadoop]$ pwd
/data/module/hadoop-3.3.4
# (2)打开/etc/profile.d/my_env.sh文件
[hadoop@master1 hadoop]$ sudo vim /etc/profile.d/my_env.sh
# 在profile文件末尾添加JDK路径:(shitf+g)
#HADOOP_HOME
export HADOOP_HOME=/data/module/hadoop-3.3.4
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HADOOP_CLASSPATH=`hadoop classpath`
#USER_HOME
export USER_HOME=/home/hadoop
export PATH=$PATH:$USER_HOME/bin
# (3)保存后退出
:wq
# (4)分发环境变量文件
# (5)source 是之生效(5台节点)
[hadoop@master1 module]$ source /etc/profile.d/my_env.sh
cd $HADOOP_HOME/etc/hadoo
vim core-site.xml
文件内容如下:
<configuration>
<property>
<name>fs.trash.intervalname>
<value>4320value>
property>
<property>
<name>fs.defaultFSname>
<value>hdfs://hadoopclustervalue>
property>
<property>
<name>hadoop.tmp.dirname>
<value>/data/module/hadoop-3.3.4/datavalue>
property>
<property>
<name>hadoop.http.staticuser.username>
<value>hadoopvalue>
property>
<property>
<name>ha.zookeeper.quorumname>
<value>xxxxvalue>
property>
<property>
<name>ipc.client.connect.max.retriesname>
<value>30value>
property>
<property>
<name>ipc.client.connect.retry.intervalname>
<value>1000value>
property>
<property>
<name>hadoop.proxyuser.hadoop.hostsname>
<value>*value>
property>
<property>
<name>hadoop.proxyuser.hadoop.groupsname>
<value>*value>
property>
<property>
<name>fs.cosn.bucket.regionname>
<value>ap-beijingvalue>
<description>The region where the bucket is locateddescription>
property>
<property>
<name>fs.cosn.credentials.providername>
<value>org.apache.hadoop.fs.auth.SimpleCredentialProvidervalue>
property>
<property>
<name>fs.cosn.userinfo.secretIdname>
<value>xxxxvalue>
<description>Tencent Cloud Secret Id description>
property>
<property>
<name>fs.cosn.userinfo.secretKeyname>
<value>xxxxvalue>
<description>Tencent Cloud Secret Keydescription>
property>
<property>
<name>fs.cosn.implname>
<value>org.apache.hadoop.fs.CosFileSystemvalue>
<description>The implementation class of the CosN Filesystemdescription>
property>
<property>
<name>fs.AbstractFileSystem.cosn.implname>
<value>org.apache.hadoop.fs.CosNvalue>
<description>The implementation class of the CosN AbstractFileSystem.description>
property>
<property>
<name>fs.cosn.tmp.dirname>
<value>/tmp/hadoop_cosvalue>
<description>Temporary files would be placed here.description>
property>
<property>
<name>fs.cosn.buffer.sizename>
<value>33554432value>
<description>The total size of the buffer pool.description>
property>
<property>
<name>fs.cosn.block.sizename>
<value>8388608value>
<description>
Block size to use cosn filesysten, which is the part size for MultipartUpload. Considering the COS supports up to 10000 blocks, user should estimate the maximum size of a single file. For example, 8MB part size can allow writing a 78GB single file.
description>
property>
<property>
<name>fs.cosn.maxRetriesname>
<value>3value>
<description>
The maximum number of retries for reading or writing files to COS, before throwing a failure to the application.
description>
property>
<property>
<name>fs.cosn.retry.interval.secondsname>
<value>3value>
<description>The number of seconds to sleep between each COS retry.description>
property>
configuration>
配置mapred-site.xml
[hadoop@master1 hadoop]$ vim mapred-site.xml
文件内容如下
<configuration>
<property>
<name>mapreduce.framework.namename>
<value>yarnvalue>
property>
<property>
<name>mapreduce.jobhistory.addressname>
<value>xxx:10020value>
property>
<property>
<name>mapreduce.jobhistory.webapp.addressname>
<value>xxx:19888value>
property>
configuration>
[hadoop@master1 hadoop]$ vim /data/module/hadoop-3.3.4/etc/hadoop/workers
# 在该文件中增加如下内容:
hadoop3
hadoop4
hadoop5
配置hdfs-site.xml
[hadoop@master1 ~]$ cd $HADOOP_HOME/etc/hadoop
[hadoop@master1 hadoop]$ vim hdfs-site.xml
文件内容如下:
<configuration>
<property>
<name>dfs.namenode.name.dirname>
<value>file://${hadoop.tmp.dir}/namevalue>
property>
<property>
<name>dfs.datanode.data.dirname>
<value>xxxvalue>
property>
<property>
<name>dfs.journalnode.edits.dirname>
<value>xxxvalue>
property>
<property>
<name>dfs.nameservicesname>
<value>hadoopclustervalue>
property>
<property>
<name>dfs.ha.namenodes.hadoopclustername>
<value>nn1,nn2value>
property>
<property>
<name>dfs.namenode.rpc-address.hadoopcluster.nn1name>
<value>xxx:8020value>
property>
<property>
<name>dfs.namenode.rpc-address.hadoopcluster.nn2name>
<value>xxx:8020value>
property>
<property>
<name>dfs.blocksizename>
<value>268435456value>
property>
<property>
<name>dfs.namenode.http-address.hadoopcluster.nn1name>
<value>xxx:9870value>
property>
<property>
<name>dfs.namenode.http-address.hadoopcluster.nn2name>
<value>xxx:9870value>
property>
<property>
<name>dfs.namenode.shared.edits.dirname>
<value>qjournal://xxx:8485;xxx:8485;xxx:8485/hadoopclustervalue>
property>
<property>
<name>dfs.client.failover.proxy.provider.hadoopclustername>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvidervalue>
property>
<property>
<name>dfs.ha.fencing.methodsname>
<value>sshfence(hadoop:12898)value>
property>
<property>
<name>dfs.ha.fencing.ssh.connect-timeoutname>
<value>30000value>
property>
<property>
<name>dfs.ha.fencing.ssh.private-key-filesname>
<value>/home/hadoop/.ssh/id_rsavalue>
property>
<property>
<name>dfs.ha.automatic-failover.enabledname>
<value>truevalue>
property>
<property>
<name>dfs.webhdfs.enabledname>
<value>truevalue>
property>
<property>
<name>dfs.namenode.handler.countname>
<value>21value>
property>
<property>
<name>dfs.datanode.handler.countname>
<value>7value>
property>
<property>
<name>dfs.datanode.max.xcieversname>
<value>4096value>
property>
<property>
<name>dfs.hostsname>
<value>/data/module/hadoop-3.3.4/etc/hadoop/whitelistvalue>
property>
<property>
<name>dfs.hosts.excludename>
<value>/data/module/hadoop-3.3.4/etc/hadoop/blacklistvalue>
property>
configuration>
配置yarn-site.xml
[hadoop@master1 hadoop]$ vim yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-servicesname>
<value>mapreduce_shuffle,spark_shufflevalue>
property>
<property>
<name>yarn.nodemanager.aux-services.spark_shuffle.classname>
<value>org.apache.spark.network.yarn.YarnShuffleServicevalue>
property>
<property>
<name>yarn.nodemanager.env-whitelistname>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOMEvalue>
property>
<property>
<name>yarn.log-aggregation-enablename>
<value>truevalue>
property>
<property>
<name>yarn.log.server.urlname>
<value>http:/xxx:19888/jobhistory/logsvalue>
property>
<property>
<name>yarn.log-aggregation.retain-secondsname>
<value>604800value>
property>
<property>
<name>yarn.nodemanager.vmem-check-enabledname>
<value>falsevalue>
property>
<property>
<name>yarn.nodemanager.pmem-check-enabledname>
<value>truevalue>
property>
<property>
<name>yarn.nodemanager.resource.memory-mbname>
<value>114688value>
property>
<property>
<name>yarn.nodemanager.resource.cpu-vcoresname>
<value>28value>
property>
<property>
<description>The minimum allocation for every container request at the RM in MBs. Memory requests lower than this will be set to the value of this property. Additionally, a node manager that is configured to have less memory than this value will be shut down by the resource manager.
description>
<name>yarn.scheduler.minimum-allocation-mbname>
<value>128value>
property>
<property>
<description>The maximum allocation for every container request at the RM in MBs. Memory requests higher than this will throw an InvalidResourceRequestException.
description>
<name>yarn.scheduler.maximum-allocation-mbname>
<value>114688value>
property>
<property>
<description>The minimum allocation for every container request at the RM in terms of virtual CPU cores. Requests lower than this will be set to the value of this property. Additionally, a node manager that is configured to have fewer virtual cores than this value will be shut down by the resource manager.
description>
<name>yarn.scheduler.minimum-allocation-vcoresname>
<value>1value>
property>
<property>
<description>The maximum allocation for every container request at the RM in terms of virtual CPU cores. Requests higher than this will throw an
InvalidResourceRequestException.description>
<name>yarn.scheduler.maximum-allocation-vcoresname>
<value>28value>
property>
<property>
<name>yarn.resourcemanager.ha.enabledname>
<value>truevalue>
property>
<property>
<name>yarn.resourcemanager.cluster-idname>
<value>cluster-yarn1value>
property>
<property>
<name>yarn.resourcemanager.ha.rm-idsname>
<value>rm1,rm2value>
property>
<property>
<name>yarn.resourcemanager.hostname.rm1name>
<value>xxxvalue>
property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1name>
<value>xxx:8088value>
property>
<property>
<name>yarn.resourcemanager.address.rm1name>
<value>xxx:8032value>
property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm1name>
<value>xxx:8030value>
property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1name>
<value>xxxx:8031value>
property>
<property>
<name>yarn.resourcemanager.hostname.rm2name>
<value>xxxvalue>
property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2name>
<value>xxx:8088value>
property>
<property>
<name>yarn.resourcemanager.address.rm2name>
<value>xxx:8032value>
property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2name>
<value>xxxx:8030value>
property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2name>
<value>xxxx:8031value>
property>
<property>
<name>yarn.resourcemanager.zk-addressname>
<value>xxxxvalue>
property>
<property>
<name>yarn.resourcemanager.recovery.enabledname>
<value>truevalue>
property>
<property>
<name>yarn.resourcemanager.store.classname>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStorevalue>
property>
<!--NodeManager 存储中间数据文件的本地文件系统中的目录列表-,只有nodemanage配置->
<!-- >
<name>yarn.nodemanager.local-dirsname>
<value>
xxxx
value>
property>
<property>
<name>yarn.nodemanager.log-dirsname>
<value>/data2/logsvalue>
property> -->
<property>
<name>yarn.resourcemanager.scheduler.classname>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulervalue>
<description>配置使用容量调度器description>
property>
<property>
<description>Number of threads to handle scheduler interface.description>
<name>yarn.resourcemanager.scheduler.client.thread-countname>
<value>100value>
property>
<property>
<name>yarn.resourcemanager.am.max-attemptsname>
<value>4value>
<description>
The maximum number of application master execution attempts,默认值2次
description>
property>
<property>
<name>yarn.timeline-service.enabledname>
<value>truevalue>
property>
<property>
<name>hadoop.http.cross-origin.allowed-originsname>
<value>*value>
property>
<property>
<name>yarn.nodemanager.webapp.cross-origin.enabledname>
<value>truevalue>
property>
<property>
<name>yarn.resourcemanager.webapp.cross-origin.enabledname>
<value>truevalue>
property>
<property>
<name>yarn.timeline-service.http-cross-origin.enabledname>
<value>truevalue>
property>
<property>
<description>Publish YARN information to Timeline Serverdescription>
<name> yarn.resourcemanager.system-metrics-publisher.enabledname>
<value>truevalue>
property>
<property>
<description>The hostname of the Timeline service web application.description>
<name>yarn.timeline-service.hostnamename>
<value>xxxvalue>
property>
<property>
<name>yarn.timeline-service.generic-application-history.enabledname>
<value>truevalue>
property>
<property>
<description>Address for the Timeline server to start the RPC server.description>
<name>yarn.timeline-service.addressname>
<value>xxx:10201value>
property>
<property>
<description>The http address of the Timeline service web application.description>
<name>yarn.timeline-service.webapp.addressname>
<value>xxx:8188value>
property>
<property>
<description>The https address of the Timeline service web application.description>
<name>yarn.timeline-service.webapp.https.addressname>
<value>xxx:2191value>
property>
<property>
<name>yarn.timeline-service.handler-thread-countname>
<value>10value>
property>
<property>
<name>yarn.resourcemanager.scheduler.monitor.enablename>
<value>truevalue>
property>
configuration>
容量调度类型配置
<configuration>
<property>
<name>yarn.scheduler.capacity.maximum-applicationsname>
<value>10000value>
<description>
Maximum number of applications that can be pending and running.
description>
property>
<property>
<name>yarn.scheduler.capacity.maximum-am-resource-percentname>
<value>0.8value>
<description>
Maximum percent of resources in the cluster which can be used to run
application masters i.e. controls number of concurrent running
applications.
description>
property>
<property>
<name>yarn.scheduler.capacity.resource-calculatorname>
<value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculatorvalue>
<description>
The ResourceCalculator implementation to be used to compare
Resources in the scheduler.
The default i.e. DefaultResourceCalculator only uses Memory while
DominantResourceCalculator uses dominant-resource to compare
multi-dimensional resources such as Memory, CPU etc.
description>
property>
<property>
<name>yarn.scheduler.capacity.root.queuesname>
<value>xx2,xx1value>
<description>
The queues at the this level (root is the root queue).
description>
property>
<property>
<name>yarn.scheduler.capacity.root.xx2.capacityname>
<value>65value>
<description>Default queue target capacity.description>
property>
<property>
<name>yarn.scheduler.capacity.root.xx2.user-limit-factorname>
<value>2value>
<description>
Default queue user limit a percentage from 0.0 to 1.0.
description>
property>
<property>
<name>yarn.scheduler.capacity.root.xx2.maximum-capacityname>
<value>80value>
<description>
The maximum capacity of the default queue.
description>
property>
<property>
<name>yarn.scheduler.capacity.root.xx2.statename>
<value>RUNNINGvalue>
<description>
The state of the default queue. State can be one of RUNNING or STOPPED.
description>
property>
<property>
<name>yarn.scheduler.capacity.root.xx2.acl_submit_applicationsname>
<value>*value>
<description>
The ACL of who can submit jobs to the default queue.
description>
property>
<property>
<name>yarn.scheduler.capacity.root.xx2.acl_administer_queuename>
<value>*value>
<description>
The ACL of who can administer jobs on the default queue.
description>
property>
<property>
<name>yarn.scheduler.capacity.root.xx2.acl_application_max_priorityname>
<value>*value>
<description>
The ACL of who can submit applications with configured priority.
For e.g, [user={name} group={name} max_priority={priority} default_priority={priority}]
description>
property>
<property>
<name>yarn.scheduler.capacity.root.xx2.maximum-application-lifetime
name>
<value>-1value>
<description>
Maximum lifetime of an application which is submitted to a queue
in seconds. Any value less than or equal to zero will be considered as
disabled.
This will be a hard time limit for all applications in this
queue. If positive value is configured then any application submitted
to this queue will be killed after exceeds the configured lifetime.
User can also specify lifetime per application basis in
application submission context. But user lifetime will be
overridden if it exceeds queue maximum lifetime. It is point-in-time
configuration.
Note : Configuring too low value will result in killing application
sooner. This feature is applicable only for leaf queue.
description>
property>
<property>
<name>yarn.scheduler.capacity.root.xx2.default-application-lifetime
name>
<value>-1value>
<description>
Default lifetime of an application which is submitted to a queue
in seconds. Any value less than or equal to zero will be considered as
disabled.
If the user has not submitted application with lifetime value then this
value will be taken. It is point-in-time configuration.
Note : Default lifetime can't exceed maximum lifetime. This feature is
applicable only for leaf queue.
description>
property>
<property>
<name>yarn.scheduler.capacity.node-locality-delayname>
<value>40value>
<description>
Number of missed scheduling opportunities after which the CapacityScheduler
attempts to schedule rack-local containers.
When setting this parameter, the size of the cluster should be taken into account.
We use 40 as the default value, which is approximately the number of nodes in one rack.
Note, if this value is -1, the locality constraint in the container request
will be ignored, which disables the delay scheduling.
description>
property>
<property>
<name>yarn.scheduler.capacity.rack-locality-additional-delayname>
<value>-1value>
<description>
Number of additional missed scheduling opportunities over the node-locality-delay
ones, after which the CapacityScheduler attempts to schedule off-switch containers,
instead of rack-local ones.
Example: with node-locality-delay=40 and rack-locality-delay=20, the scheduler will
attempt rack-local assignments after 40 missed opportunities, and off-switch assignments
after 40+20=60 missed opportunities.
When setting this parameter, the size of the cluster should be taken into account.
We use -1 as the default value, which disables this feature. In this case, the number
of missed opportunities for assigning off-switch containers is calculated based on
the number of containers and unique locations specified in the resource request,
as well as the size of the cluster.
description>
property>
<property>
<name>yarn.scheduler.capacity.queue-mappingsname>
<value>value>
<description>
A list of mappings that will be used to assign jobs to queues
The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
Typically this list will be used to map users to queues,
for example, u:%user:%user maps all users to queues with the same name
as the user.
description>
property>
<property>
<name>yarn.scheduler.capacity.queue-mappings-override.enablename>
<value>falsevalue>
<description>
If a queue mapping is present, will it override the value specified
by the user? This can be used by administrators to place jobs in queues
that are different than the one specified by the user.
The default is false.
description>
property>
<property>
<name>yarn.scheduler.capacity.per-node-heartbeat.maximum-offswitch-assignmentsname>
<value>1value>
<description>
Controls the number of OFF_SWITCH assignments allowed
during a node's heartbeat. Increasing this value can improve
scheduling rate for OFF_SWITCH containers. Lower values reduce
"clumping" of applications on particular nodes. The default is 1.
Legal values are 1-MAX_INT. This config is refreshable.
description>
property>
<property>
<name>yarn.scheduler.capacity.application.fail-fastname>
<value>falsevalue>
<description>
Whether RM should fail during recovery if previous applications'
queue is no longer valid.
description>
property>
<property>
<name>yarn.scheduler.capacity.root.xx1.capacityname>
<value>35value>
property>
<property>
<name>yarn.scheduler.capacity.root.xx1.user-limit-factorname>
<value>2value>
property>
<property>
<name>yarn.scheduler.capacity.root.xx1.maximum-capacityname>
<value>50value>
property>
<property>
<name>yarn.scheduler.capacity.root.xx1.maximum-am-resource-percentname>
<value>0.85value>
property>
<property>
<name>yarn.scheduler.capacity.root.xx1.statename>
<value>RUNNINGvalue>
property>
<property>
<name>yarn.scheduler.capacity.root.xx1.acl_submit_applicationsname>
<value>*value>
property>
<property>
<name>yarn.scheduler.capacity.root.xx1.acl_administer_queuename>
<value>*value>
property>
<property>
<name>yarn.scheduler.capacity.root.xx1.acl_application_max_priorityname>
<value>*value>
property>
<property>
<name>yarn.scheduler.capacity.root.xx1.maximum-application-lifetimename>
<value>-1value>
property>
<property>
<name>yarn.scheduler.capacity.root.xx1.default-application-lifetimename>
<value>-1value>
property>
configuration>
# export HADOOP_SSH_OPTS="-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10s"
export HADOOP_SSH_OPTS="-p 12898" export
# Where pid files are stored. /tmp by ,此处不保存,时间长后无法关闭hadoop
default.HADOOP_PID_DIR=/data/module/hadoop-3.3.4/pids
hdfs --daemon start journalnode
# 关闭命令,此处不执行:hdfs --daemon stop journalnode
# 在102执行
hdfs namenode -format
# 启动NameNode
hdfs --daemon start namenode
# (关闭命令,此处不执行:hdfs --daemon stop namenode)
# 在103执行
hdfs namenode -bootstrapStandby
#然后启动NameNode
# 在103执行
hdfs --daemon start namenode
hdfs --daemon stop namenode
zkServer.sh start
# 在NN节点执行
hdfs zkfc -formatZK
# 在三台节点都执行
hdfs --daemon start datanode
# hdfs --daemon stop datanode
start-dfs.sh/stop-dfs.sh
start-yarn.sh/stop-yarn.sh
mapred --daemon start historyserver
详见本博客其他篇章
Hive和Spark生产集群搭建(spark on doris)
Hadoop集成对象存储和HDFS磁盘文件存储
最全HSQL命令大全(Hive命令)