最全Hadoop实际生产集群高可用搭建

1.环境准备

1.1 集群规划

序号 bigdata-001 bigdata-002 bigdata-003 bigdata-004 bigdata-005
IP x x x x x x xxx xxx xxx
组件 hadoop1 hadoop2 hadoop3 hadoop4 hadoop5
内存 64G 64G 128G 128G 128G
CPU核 16 16 32 32 32
Hadoop-3.3.4 NameNode NameNode DataNode DataNode DataNode
ResourceManager ResourceManager NodeManager NodeManager NodeManager
DFSZKFailoverController DFSZKFailoverController JournalNode JournalNode JournalNode
HistoryServer
Zookeeper-3.5.7 zk zk zk

1.1 添加新用户并修改权限

useradd hadoop
passwd hadoop

visudo
# 在root   ALL=(ALL)       ALL一行下面添加
hadoop    ALL=(ALL)       NOPASSWD: ALL

1.2 配置hosts

sudo vim /etc/hosts
xxxx hadoop1
xxxx hadoop2

1.3 SSH免密登录

mkdir ~/.ssh
cd ~/.ssh
ssh-keygen -t rsa -m PEM
touch authorized_keys
#将authorized_keys配置好后,在编其他机器增加ssh的authorized_keys内入复制过去(必须有该机器的id_rsa.pub)

2.JDK安装

tar -zxvf jdk-8u212-linux-x64.tar.gz -C /data/module/
 mv jdk1.8.0_212/ jdk1.8.0_212
 #设置环境变量配置JDK
 sudo vim /etc/profile.d/my_env.sh

 #添加JAVA_HOME
export JAVA_HOME=/data/module/jdk1.8.0_212
export PATH=$PATH:$JAVA_HOME/bin
# 让环境变量生效
source /etc/profile.d/my_env.sh
# 测试JDK是否安装成功
java -version
#每一台机器都需要配置

3.Zookeeper安装及配置

#解压安装

# (1)解压Zookeeper安装包到/data/module/目录下
[hadoop@master1 software]$ tar -zxvf apache-zookeeper-3.5.7-bin.tar.gz -C /data/module/
# (2)修改/data/module/apache-zookeeper-3.5.7-bin名称为zookeeper-3.5.7
[hadoop@master1 module]$ mv apache-zookeeper-3.5.7-bin/  zk-3.5.7
# 2)配置服务器编号
# (1)在/data/module/zk-3.5.7/这个目录下创建zkData
[hadoop@master1 zookeeper]$ mkdir zkData
# (2)在/data/module/zk-3.5.7/zkData目录下创建一个myid的文件
[hadoop@master1 zkData]$ vim myid
# 添加myid文件,注意一定要在linux里面创建,在notepad++里面很可能乱码
# 在文件中添加与server对应的编号(保证每一个节点对饮的编号均不同):
 2
# 3)配置zoo.cfg文件
# (1)重命名/data/module/zk-3.5.7/conf这个目录下的zoo_sample.cfg为zoo.cfg
[hadoop@master1 conf]$ mv zoo_sample.cfg zoo.cfg
# (2)打开zoo.cfg文件
[hadoop@master1 conf]$ vim zoo.cfg
# 修改数据存储路径配置
dataDir=/data/module/zookeeper-3.5.7/zkData
# 增加如下配置
#######################cluster##########################
server.1=hadoop3:2888:3888
server.2=hadoop4:2888:3888
server.3=hadoop5:2888:3888
# (3)同步/data/module/zookeeper-3.5.7目录内容到master2、common1


4.Hadoop安装

4.1 组件安装

# 1)用SecureCRT工具将hadoop-3.3.4.tar.gz导入到data目录下面的software文件夹下面
# 2)进入到Hadoop安装包路径下
[hadoop@master1 ~]$ cd /data/software/
# 3)解压安装文件到/data/module下面
[hadoop@master1 software]$ tar -zxvf hadoop-3.3.4.tar.gz -C /data/module/
# 4)查看是否解压成功
[hadoop@master1 software]$ ls /data/module/hadoop-3.3.4
# 5)重命名
[hadoop@master1 software]$ mv /data/module/hadoop-3.3.4 /data/module/hadoop-3.3.4
# 7)将Hadoop添加到环境变量
# 	(1)获取Hadoop安装路径
[hadoop@master1 hadoop]$ pwd
/data/module/hadoop-3.3.4
	# (2)打开/etc/profile.d/my_env.sh文件
[hadoop@master1 hadoop]$ sudo vim /etc/profile.d/my_env.sh
# 在profile文件末尾添加JDK路径:(shitf+g)
#HADOOP_HOME
export HADOOP_HOME=/data/module/hadoop-3.3.4
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HADOOP_CLASSPATH=`hadoop classpath`

#USER_HOME
export USER_HOME=/home/hadoop
export PATH=$PATH:$USER_HOME/bin


# (3)保存后退出
:wq
	# (4)分发环境变量文件
# (5)source 是之生效(5台节点)
[hadoop@master1 module]$ source /etc/profile.d/my_env.sh

4.2 Hadoop配置集群

4.2.1 core配置文件

cd $HADOOP_HOME/etc/hadoo
vim core-site.xml

文件内容如下:




<configuration>
 	
 <property>
    <name>fs.trash.intervalname>
    <value>4320value>
  property>

 
  <property>
    <name>fs.defaultFSname>
    <value>hdfs://hadoopclustervalue>
  property>
  
  <property>
    <name>hadoop.tmp.dirname>
    <value>/data/module/hadoop-3.3.4/datavalue>
  property>
  
  <property>
    <name>hadoop.http.staticuser.username>
    <value>hadoopvalue>
  property>
  
  <property>
    <name>ha.zookeeper.quorumname>
    <value>xxxxvalue>
  property>




<property>
  <name>ipc.client.connect.max.retriesname>
  <value>30value>
property>


<property>
  <name>ipc.client.connect.retry.intervalname>
  <value>1000value>
property>
<property>
    <name>hadoop.proxyuser.hadoop.hostsname>
    <value>*value>
property>
<property>
    <name>hadoop.proxyuser.hadoop.groupsname>
    <value>*value>
property>



    <property>
        <name>fs.cosn.bucket.regionname>
        <value>ap-beijingvalue>
        <description>The region where the bucket is locateddescription>
    property>



<property>
        <name>fs.cosn.credentials.providername>
        <value>org.apache.hadoop.fs.auth.SimpleCredentialProvidervalue>
property>

    <property>
        <name>fs.cosn.userinfo.secretIdname>
        <value>xxxxvalue>
        <description>Tencent Cloud Secret Id description>
    property>

    <property>
        <name>fs.cosn.userinfo.secretKeyname>
        <value>xxxxvalue>
        <description>Tencent Cloud Secret Keydescription>
    property>


<property>
        <name>fs.cosn.implname>
        <value>org.apache.hadoop.fs.CosFileSystemvalue>
        <description>The implementation class of the CosN Filesystemdescription>
    property>

    <property>
        <name>fs.AbstractFileSystem.cosn.implname>
        <value>org.apache.hadoop.fs.CosNvalue>
        <description>The implementation class of the CosN AbstractFileSystem.description>
    property>

<property>
        <name>fs.cosn.tmp.dirname>
        <value>/tmp/hadoop_cosvalue>
        <description>Temporary files would be placed here.description>
    property>

    <property>
        <name>fs.cosn.buffer.sizename>
        <value>33554432value>
        <description>The total size of the buffer pool.description>
    property>

    <property>
        <name>fs.cosn.block.sizename>
        <value>8388608value>
        <description>
        Block size to use cosn filesysten, which is the part size for MultipartUpload. Considering the COS supports up to 10000 blocks, user should estimate the maximum size of a single file. For example, 8MB part size can allow  writing a 78GB single file.
        description>
    property>

    <property>
        <name>fs.cosn.maxRetriesname>
        <value>3value>
        <description>
      The maximum number of retries for reading or writing files to COS, before throwing a failure to the application.
        description>
    property>

    <property>
        <name>fs.cosn.retry.interval.secondsname>
        <value>3value>
        <description>The number of seconds to sleep between each COS retry.description>
    property>

configuration>

4.2.2 MapReduce配置文件

配置mapred-site.xml

[hadoop@master1 hadoop]$ vim mapred-site.xml

文件内容如下



<configuration>

	
    <property>
        <name>mapreduce.framework.namename>
        <value>yarnvalue>
    property>

<property>
    <name>mapreduce.jobhistory.addressname>
    <value>xxx:10020value>
property>


<property>
    <name>mapreduce.jobhistory.webapp.addressname>
    <value>xxx:19888value>
property>
configuration>

4.2.3 配置workers

[hadoop@master1 hadoop]$ vim /data/module/hadoop-3.3.4/etc/hadoop/workers
# 在该文件中增加如下内容:
hadoop3
hadoop4
hadoop5

4.2.4 HDFS配置文件

配置hdfs-site.xml

[hadoop@master1 ~]$ cd $HADOOP_HOME/etc/hadoop
[hadoop@master1 hadoop]$ vim hdfs-site.xml

文件内容如下:






<configuration>


  <property>
    <name>dfs.namenode.name.dirname>
    <value>file://${hadoop.tmp.dir}/namevalue>
  property>
  
  <property>
    <name>dfs.datanode.data.dirname>
   <value>xxxvalue>
  property>

  
  <property>
    <name>dfs.journalnode.edits.dirname>
	<value>xxxvalue>
  property>
  
  <property>
    <name>dfs.nameservicesname>
    <value>hadoopclustervalue>
  property>
  
  <property>
    <name>dfs.ha.namenodes.hadoopclustername>
    <value>nn1,nn2value>
  property>
  
  <property>
    <name>dfs.namenode.rpc-address.hadoopcluster.nn1name>
    <value>xxx:8020value>
  property>
  <property>
    <name>dfs.namenode.rpc-address.hadoopcluster.nn2name>
    <value>xxx:8020value>
  property>

  
  <property>
    <name>dfs.blocksizename>
    <value>268435456value>
    property>
  
  <property>
    <name>dfs.namenode.http-address.hadoopcluster.nn1name>
    <value>xxx:9870value>
  property>
  <property>
    <name>dfs.namenode.http-address.hadoopcluster.nn2name>
    <value>xxx:9870value>
  property>
  
  
  <property>
    <name>dfs.namenode.shared.edits.dirname>
    <value>qjournal://xxx:8485;xxx:8485;xxx:8485/hadoopclustervalue>
  property>
  
  <property>
    <name>dfs.client.failover.proxy.provider.hadoopclustername>
    <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvidervalue>
  property>
  
  <property>
    <name>dfs.ha.fencing.methodsname>
    <value>sshfence(hadoop:12898)value>
  property>
<property>
    <name>dfs.ha.fencing.ssh.connect-timeoutname>
    <value>30000value>
property>

  
  <property>
    <name>dfs.ha.fencing.ssh.private-key-filesname>
    <value>/home/hadoop/.ssh/id_rsavalue>
  property>
  
  <property>
    <name>dfs.ha.automatic-failover.enabledname>
    <value>truevalue>
  property>

 
<property>  
<name>dfs.webhdfs.enabledname>  
<value>truevalue>  
property> 

<property>
    <name>dfs.namenode.handler.countname>
    <value>21value>
property>
 
<property>
    <name>dfs.datanode.handler.countname>
    <value>7value>
property>
 
<property>
    <name>dfs.datanode.max.xcieversname>
    <value>4096value>
property>

<property>
     <name>dfs.hostsname>
     <value>/data/module/hadoop-3.3.4/etc/hadoop/whitelistvalue>
property>


<property>
     <name>dfs.hosts.excludename>
     <value>/data/module/hadoop-3.3.4/etc/hadoop/blacklistvalue>
property>

configuration>

4.2.5 YARN配置文件

配置yarn-site.xml

[hadoop@master1 hadoop]$ vim yarn-site.xml


<configuration>




  <property>
    <name>yarn.nodemanager.aux-servicesname>
    <value>mapreduce_shuffle,spark_shufflevalue>
  property>
<property>
    <name>yarn.nodemanager.aux-services.spark_shuffle.classname>
    <value>org.apache.spark.network.yarn.YarnShuffleServicevalue>
property>

  
  <property>
    <name>yarn.nodemanager.env-whitelistname>
    <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOMEvalue>
  property>
  
  <property>
    <name>yarn.log-aggregation-enablename>
    <value>truevalue>
  property>
  
  <property>  
    <name>yarn.log.server.urlname>  
    <value>http:/xxx:19888/jobhistory/logsvalue>
  property>
  
  <property>
    <name>yarn.log-aggregation.retain-secondsname>
    <value>604800value>
  property>
  
  <property>
    <name>yarn.nodemanager.vmem-check-enabledname>
    <value>falsevalue>
  property>
  <property>
    <name>yarn.nodemanager.pmem-check-enabledname>
    <value>truevalue>
  property>
  
  <property>
    <name>yarn.nodemanager.resource.memory-mbname>
    <value>114688value>
  property>

  
  <property>
    <name>yarn.nodemanager.resource.cpu-vcoresname>
    <value>28value>
  property>
  <property>
    <description>The minimum allocation for every container request at the RM	in MBs. Memory requests lower than this will be set to the value of this	property. Additionally, a node manager that is configured to have less memory	than this value will be shut down by the resource manager.
    description>
    <name>yarn.scheduler.minimum-allocation-mbname>
    <value>128value>
  property>

  
  <property>
    <description>The maximum allocation for every container request at the RM	in MBs. Memory requests higher than this will throw an	InvalidResourceRequestException.
    description>
    <name>yarn.scheduler.maximum-allocation-mbname>
    <value>114688value>
  property>
  
  <property>
    <description>The minimum allocation for every container request at the RM	in terms of virtual CPU cores. Requests lower than this will be set to the	value of this property. Additionally, a node manager that is configured to	have fewer virtual cores than this value will be shut down by the resource	manager.
    description>
    <name>yarn.scheduler.minimum-allocation-vcoresname>
    <value>1value>
  property>

  
  <property>
    <description>The maximum allocation for every container request at the RM	in terms of virtual CPU cores. Requests higher than this will throw an
    InvalidResourceRequestException.description>
    <name>yarn.scheduler.maximum-allocation-vcoresname>
    <value>28value>
  property>

  
  <property>
    <name>yarn.resourcemanager.ha.enabledname>
    <value>truevalue>
  property>

  
  <property>
    <name>yarn.resourcemanager.cluster-idname>
    <value>cluster-yarn1value>
  property>

  
  <property>
    <name>yarn.resourcemanager.ha.rm-idsname>
    <value>rm1,rm2value>
  property>
  
  
  <property>
    <name>yarn.resourcemanager.hostname.rm1name>
    <value>xxxvalue>
  property>

  
  <property>
    <name>yarn.resourcemanager.webapp.address.rm1name>
    <value>xxx:8088value>
  property>

  
  <property>
    <name>yarn.resourcemanager.address.rm1name>
    <value>xxx:8032value>
  property>

  
  <property>
    <name>yarn.resourcemanager.scheduler.address.rm1name>  
    <value>xxx:8030value>
  property>

    
  <property>
  <name>yarn.resourcemanager.resource-tracker.address.rm1name>
    <value>xxxx:8031value>
  property>

  
  
  <property>
    <name>yarn.resourcemanager.hostname.rm2name>
    <value>xxxvalue>
  property>
  <property>
    <name>yarn.resourcemanager.webapp.address.rm2name>
    <value>xxx:8088value>
  property>
  <property>
    <name>yarn.resourcemanager.address.rm2name>
    <value>xxx:8032value>
  property>
  <property>
    <name>yarn.resourcemanager.scheduler.address.rm2name>
    <value>xxxx:8030value>
  property>

  <property>
  <name>yarn.resourcemanager.resource-tracker.address.rm2name>
    <value>xxxx:8031value>
  property>

   
  <property>
    <name>yarn.resourcemanager.zk-addressname>
    <value>xxxxvalue>
  property>

   
  <property>
    <name>yarn.resourcemanager.recovery.enabledname>
    <value>truevalue>
  property>

   
  <property>
    <name>yarn.resourcemanager.store.classname>
    <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStorevalue>
  property>



  <!--NodeManager 存储中间数据文件的本地文件系统中的目录列表-,只有nodemanage配置->

      <!-- >
                <name>yarn.nodemanager.local-dirsname>
                <value>
                xxxx
                value>
            property>
        
        <property>
            <name>yarn.nodemanager.log-dirsname>
            <value>/data2/logsvalue>
        property>  -->

<property>
    <name>yarn.resourcemanager.scheduler.classname>
    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulervalue>
    <description>配置使用容量调度器description>
property>

<property>
	<description>Number of threads to handle scheduler interface.description>
	<name>yarn.resourcemanager.scheduler.client.thread-countname>
	<value>100value>
property>

<property>
  <name>yarn.resourcemanager.am.max-attemptsname>
  <value>4value>
  <description>
    The maximum number of application master execution attempts,默认值2次
  description>
property>



<property>
    <name>yarn.timeline-service.enabledname>
    <value>truevalue>
property>

<property>
    <name>hadoop.http.cross-origin.allowed-originsname>
    <value>*value>
property>

<property>
    <name>yarn.nodemanager.webapp.cross-origin.enabledname>
    <value>truevalue>
property>

<property>
    <name>yarn.resourcemanager.webapp.cross-origin.enabledname>
    <value>truevalue>
property>

<property>
    <name>yarn.timeline-service.http-cross-origin.enabledname>
    <value>truevalue>
property>

<property>
  <description>Publish YARN information to Timeline Serverdescription>
  <name> yarn.resourcemanager.system-metrics-publisher.enabledname>
  <value>truevalue>
property>

<property>
  <description>The hostname of the Timeline service web application.description>
  <name>yarn.timeline-service.hostnamename>
  <value>xxxvalue>
property>


   <property>
        <name>yarn.timeline-service.generic-application-history.enabledname>
        <value>truevalue>
   property>
   <property>
        <description>Address for the Timeline server to start the RPC server.description>
        <name>yarn.timeline-service.addressname>
        <value>xxx:10201value>
   property>
   <property>
        <description>The http address of the Timeline service web application.description>
        <name>yarn.timeline-service.webapp.addressname>
        <value>xxx:8188value>
   property>
   <property>
        <description>The https address of the Timeline service web application.description>
        <name>yarn.timeline-service.webapp.https.addressname>
        <value>xxx:2191value>
   property>
   <property>
        <name>yarn.timeline-service.handler-thread-countname>
        <value>10value>
   property>

<property>
     <name>yarn.resourcemanager.scheduler.monitor.enablename>
     <value>truevalue>
property>

configuration>

4.2.6 调度类型配置

容量调度类型配置

<configuration>

  <property>
    <name>yarn.scheduler.capacity.maximum-applicationsname>
    <value>10000value>
    <description>
      Maximum number of applications that can be pending and running.
    description>
  property>

  <property>
    <name>yarn.scheduler.capacity.maximum-am-resource-percentname>
    <value>0.8value>
    <description>
      Maximum percent of resources in the cluster which can be used to run 
      application masters i.e. controls number of concurrent running
      applications.
    description>
  property>

  <property>
    <name>yarn.scheduler.capacity.resource-calculatorname>
    <value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculatorvalue>
    <description>
      The ResourceCalculator implementation to be used to compare 
      Resources in the scheduler.
      The default i.e. DefaultResourceCalculator only uses Memory while
      DominantResourceCalculator uses dominant-resource to compare 
      multi-dimensional resources such as Memory, CPU etc.
    description>
  property>

  <property>
    <name>yarn.scheduler.capacity.root.queuesname>
    <value>xx2,xx1value>
    <description>
      The queues at the this level (root is the root queue).
    description>
  property>

  <property>
    <name>yarn.scheduler.capacity.root.xx2.capacityname>
    <value>65value>
    <description>Default queue target capacity.description>
  property>

  <property>
    <name>yarn.scheduler.capacity.root.xx2.user-limit-factorname>
    <value>2value>
    <description>
      Default queue user limit a percentage from 0.0 to 1.0.
    description>
  property>

  <property>
    <name>yarn.scheduler.capacity.root.xx2.maximum-capacityname>
    <value>80value>
    <description>
      The maximum capacity of the default queue. 
    description>
  property>

  <property>
    <name>yarn.scheduler.capacity.root.xx2.statename>
    <value>RUNNINGvalue>
    <description>
      The state of the default queue. State can be one of RUNNING or STOPPED.
    description>
  property>

  <property>
    <name>yarn.scheduler.capacity.root.xx2.acl_submit_applicationsname>
    <value>*value>
    <description>
      The ACL of who can submit jobs to the default queue.
    description>
  property>

  <property>
    <name>yarn.scheduler.capacity.root.xx2.acl_administer_queuename>
    <value>*value>
    <description>
      The ACL of who can administer jobs on the default queue.
    description>
  property>

  <property>
    <name>yarn.scheduler.capacity.root.xx2.acl_application_max_priorityname>
    <value>*value>
    <description>
      The ACL of who can submit applications with configured priority.
      For e.g, [user={name} group={name} max_priority={priority} default_priority={priority}]
    description>
  property>

   <property>
     <name>yarn.scheduler.capacity.root.xx2.maximum-application-lifetime
     name>
     <value>-1value>
     <description>
        Maximum lifetime of an application which is submitted to a queue
        in seconds. Any value less than or equal to zero will be considered as
        disabled.
        This will be a hard time limit for all applications in this
        queue. If positive value is configured then any application submitted
        to this queue will be killed after exceeds the configured lifetime.
        User can also specify lifetime per application basis in
        application submission context. But user lifetime will be
        overridden if it exceeds queue maximum lifetime. It is point-in-time
        configuration.
        Note : Configuring too low value will result in killing application
        sooner. This feature is applicable only for leaf queue.
     description>
   property>

   <property>
     <name>yarn.scheduler.capacity.root.xx2.default-application-lifetime
     name>
     <value>-1value>
     <description>
        Default lifetime of an application which is submitted to a queue
        in seconds. Any value less than or equal to zero will be considered as
        disabled.
        If the user has not submitted application with lifetime value then this
        value will be taken. It is point-in-time configuration.
        Note : Default lifetime can't exceed maximum lifetime. This feature is
        applicable only for leaf queue.
     description>
   property>

  <property>
    <name>yarn.scheduler.capacity.node-locality-delayname>
    <value>40value>
    <description>
      Number of missed scheduling opportunities after which the CapacityScheduler 
      attempts to schedule rack-local containers.
      When setting this parameter, the size of the cluster should be taken into account.
      We use 40 as the default value, which is approximately the number of nodes in one rack.
      Note, if this value is -1, the locality constraint in the container request
      will be ignored, which disables the delay scheduling.
    description>
  property>

  <property>
    <name>yarn.scheduler.capacity.rack-locality-additional-delayname>
    <value>-1value>
    <description>
      Number of additional missed scheduling opportunities over the node-locality-delay
      ones, after which the CapacityScheduler attempts to schedule off-switch containers,
      instead of rack-local ones.
      Example: with node-locality-delay=40 and rack-locality-delay=20, the scheduler will
      attempt rack-local assignments after 40 missed opportunities, and off-switch assignments
      after 40+20=60 missed opportunities.
      When setting this parameter, the size of the cluster should be taken into account.
      We use -1 as the default value, which disables this feature. In this case, the number
      of missed opportunities for assigning off-switch containers is calculated based on
      the number of containers and unique locations specified in the resource request,
      as well as the size of the cluster.
    description>
  property>

  <property>
    <name>yarn.scheduler.capacity.queue-mappingsname>
    <value>value>
    <description>
      A list of mappings that will be used to assign jobs to queues
      The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
      Typically this list will be used to map users to queues,
      for example, u:%user:%user maps all users to queues with the same name
      as the user.
    description>
  property>

  <property>
    <name>yarn.scheduler.capacity.queue-mappings-override.enablename>
    <value>falsevalue>
    <description>
      If a queue mapping is present, will it override the value specified
      by the user? This can be used by administrators to place jobs in queues
      that are different than the one specified by the user.
      The default is false.
    description>
  property>

  <property>
    <name>yarn.scheduler.capacity.per-node-heartbeat.maximum-offswitch-assignmentsname>
    <value>1value>
    <description>
      Controls the number of OFF_SWITCH assignments allowed
      during a node's heartbeat. Increasing this value can improve
      scheduling rate for OFF_SWITCH containers. Lower values reduce
      "clumping" of applications on particular nodes. The default is 1.
      Legal values are 1-MAX_INT. This config is refreshable.
    description>
  property>


  <property>
    <name>yarn.scheduler.capacity.application.fail-fastname>
    <value>falsevalue>
    <description>
      Whether RM should fail during recovery if previous applications'
      queue is no longer valid.
    description>
  property>







<property>
    <name>yarn.scheduler.capacity.root.xx1.capacityname>
    <value>35value>
property>


<property>
    <name>yarn.scheduler.capacity.root.xx1.user-limit-factorname>
    <value>2value>
property>


<property>
    <name>yarn.scheduler.capacity.root.xx1.maximum-capacityname>
    <value>50value>
property>


<property>
<name>yarn.scheduler.capacity.root.xx1.maximum-am-resource-percentname>
    <value>0.85value>
property>


<property>
    <name>yarn.scheduler.capacity.root.xx1.statename>
    <value>RUNNINGvalue>
property>


<property>
    <name>yarn.scheduler.capacity.root.xx1.acl_submit_applicationsname>
    <value>*value>
property>


<property>
    <name>yarn.scheduler.capacity.root.xx1.acl_administer_queuename>
    <value>*value>
property>


<property>
    <name>yarn.scheduler.capacity.root.xx1.acl_application_max_priorityname>
    <value>*value>
property>




<property>
<name>yarn.scheduler.capacity.root.xx1.maximum-application-lifetimename>
    <value>-1value>
property>


<property>
    <name>yarn.scheduler.capacity.root.xx1.default-application-lifetimename>
    <value>-1value>
property>



configuration>

4.2.7 修改 hadoop-env.sh

# export HADOOP_SSH_OPTS="-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10s"
export HADOOP_SSH_OPTS="-p 12898" export 
# Where pid files are stored.  /tmp by ,此处不保存,时间长后无法关闭hadoop
default.HADOOP_PID_DIR=/data/module/hadoop-3.3.4/pids

4.2.8 创建 whitelist blacklist(空即可,不使用某个common节点放里面就可以)

4.2.9 同步配置文件

4.2.10 启动QJM集群(三台JournalNode)

hdfs --daemon start journalnode
# 关闭命令,此处不执行:hdfs --daemon stop journalnode

4.2.11 格式化NN(

# 在102执行
hdfs namenode -format
# 启动NameNode
hdfs --daemon start namenode
# (关闭命令,此处不执行:hdfs --daemon stop namenode)

4.2.12 其他hadoop2节点同步NN数据

# 在103执行
hdfs namenode -bootstrapStandby
#然后启动NameNode
# 在103执行
hdfs --daemon start namenode
hdfs --daemon stop namenode

4.2.13 启动zookeeper集群

zkServer.sh start

4.2.14 初始化zkfc在zookeeper中的父节点

# 在NN节点执行
hdfs zkfc -formatZK

4.2.15 在三台DN节点启动DN

# 在三台节点都执行
hdfs --daemon start datanode
# hdfs --daemon stop datanode

4.2.16 以后集群的启停

start-dfs.sh/stop-dfs.sh
start-yarn.sh/stop-yarn.sh

4.2.17 启动历史服务器

mapred --daemon start historyserver

5.Hadoop其他组件配套安装

详见本博客其他篇章
Hive和Spark生产集群搭建(spark on doris)
Hadoop集成对象存储和HDFS磁盘文件存储
最全HSQL命令大全(Hive命令)

你可能感兴趣的:(Hadoop,hadoop,大数据,hdfs,yarn,spark)