大数据平台搭建(3)

注意:因为博客中美元符号有特殊含义,所以将美元符号替换为&
i.配置hdfs-site.xml


   
      dfs.namenode.name.dir
      file:/usr/local/hadoop-2.7.3/dfs/name
   
   
      dfs.datanode.data.dir
      file:/usr/local/hadoop-2.7.3/dfs/data 
   
   
      dfs.replication
      3
   
   
      dfs.webhdfs.enabled
      true
   
   
      dfs.permissions
      false
   
   
      dfs.permissions.enabled
      false
   
     
      dfs.nameservices    //dfs.nameservices命名空间的逻辑名称,如果使用HDFS Federation,可以配置多个命名空间的名称,使用逗号分开即可。
      mycluster     
   
   
      dfs.ha.namenodes.mycluster  //dfs.ha.namenodes.[nameservice ID] 命名空间中所有NameNode的唯一标示名称。可以配置多个,

        nn1,nn2                               //  使用逗号分隔。该名称是可以让DataNode知道每个集群的所有NameNode。当前,每个集群最多只能配置两个NameNode。
   
   
      dfs.namenode.rpc-address.mycluster.nn1    //dfs.namenode.rpc-address.[nameservice ID].[name node ID] 每个namenode监听的RPC地址
      namenode01:9000
   
   
      dfs.namenode.rpc-address.mycluster.nn2
      namenode02:9000
   
   
      dfs.namenode.servicerpc-address.mycluster.nn1
      namenode01:53310
   
   
      dfs.namenode.servicerpc-address.mycluster.nn2
      namenode02:53310
   
   
      dfs.namenode.http-address.mycluster.nn1   //dfs.namenode.http-address.[nameservice ID].[name node ID] 每个namenode监听的http地址。
      namenode01:50070
   
   
      dfs.namenode.http-address.mycluster.nn2
      namenode02:50070
   
   
      dfs.namenode.shared.edits.dir    
      qjournal://192.168.0.25:8485;192.168.0.26:8485;192.168.0.27:8485/mycluster

         //dfs.namenode.shared.edits.dir 这是NameNode读写JNs组的uri。通过这个uri,NameNodes可以读写edit log内容。URI的格 式"qjournal://host1:port1;host2:port2;host3:port3/journalId"。这里的host1、host2、host3指的是Journal Node的地址,这里必须是奇数个,至少3个;其中journalId是集群的唯一标识符,对于多个联邦命名空间,也使用同一个journalId。
   
   
      dfs.client.failover.proxy.provider.mycluster  //dfs.client.failover.proxy.provider.[nameservice ID] 这里配置HDFS客户端连接到Active NameNode
      org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
   
   
      dfs.ha.fencing.methods //dfs.ha.fencing.methods 配置active namenode出错时的处理类
      sshfence
   
     
      dfs.ha.fencing.ssh.private-key-files
      /root/.ssh/id_rsa
   
   
      dfs.ha.fencing.ssh.connect-timeout
      30000
   
   
      dfs.journalnode.edits.dir //dfs.journalnode.edits.dir 这是JournalNode进程保持逻辑状态的路径。
      /usr/local/hadoop-2.7.3/tmp/journal
   
   
      dfs.ha.automatic-failover.enabled
      true
   
   
      ha.failover-controller.cli-check.rpc-timeout.ms
      60000
   
   
      ipc.client.connect.timeout
      60000
   
   
      dfs.image.transfer.bandwidthPerSec
      4194304
   


j.配置mapred-site.xml


  
        mapreduce.framework.name
        yarn
  

k.配置yarn-site.xml


    
        yarn.resourcemanager.connect.retry-interval.ms
        2000
    
    
        yarn.resourcemanager.ha.enabled
        true
    
    
        yarn.resourcemanager.ha.automatic-failover.enabled
        true
    
    
        yarn.resourcemanager.ha.rm-ids
        rm1,rm2
    
    
        yarn.resourcemanager.ha.id
        rm1 
    
    
        yarn.resourcemanager.store.class
        org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore
    
    
        yarn.resourcemanager.zk.state-store.address
        namenode01:2181
    
    
        ha.zookeeper.quorum
        namenode01:2181
    
    
        yarn.resourcemanager.recovery.enabled
        true
    
    
        yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms
        5000
    
    
    
        yarn.resourcemanager.address.rm1
        namenode01:23140
    
    
        yarn.resourcemanager.scheduler.address.rm1
        namenode01:23130
    
    
        yarn.resourcemanager.webapp.address.rm1
        namenode01:23188
    
    
        yarn.resourcemanager.resource-tracker.address.rm1
        namenode01:23125
    
    
        yarn.resourcemanager.admin.address.rm1
        namenode01:23141
    
    
        yarn.resourcemanager.ha.admin.address.rm1
        namenode01:23142
    

    
    
        yarn.resourcemanager.address.rm2
        namenode02:23140
    
    
        yarn.resourcemanager.scheduler.address.rm2
        namenode02:23130
    
    
    yarn.resourcemanager.webapp.address.rm2
    namenode02:23188
    
    
        yarn.resourcemanager.resource-tracker.address.rm2
        namenode02:23125
    
    
        yarn.resourcemanager.admin.address.rm2
        namenode02:23141
    
    
        yarn.resourcemanager.ha.admin.address.rm2
        namenode02:23142
    
    
    
        Address where the localizer IPC is.
        yarn.nodemanager.localizer.address
        0.0.0.0:23344
    
    
        NM Webapp address.
        yarn.nodemanager.webapp.address
        0.0.0.0:23999
    
    
        yarn.nodemanager.aux-services
        mapreduce_shuffle
    
    
        yarn.nodemanager.aux-services.mapreduce.shuffle.class
        org.apache.hadoop.mapred.ShuffleHandler
    
    
        yarn.nodemanager.local-dirs
        /usr/local/hadoop-2.7.3/tmp/yarn/local
    
    
        yarn.nodemanager.log-dirs
        /usr/local/hadoop-2.7.3/tmp/yarn/log
    
    
        mapreduce.shuffle.port
        23080
    

11.启动集群
a.namenode01节点执行如下命令,创建命名空间(hdfs zkfc -formatZK)
b.在每个节点用如下命令启日志程序(hadoop-daemon.sh start journalnode)
c.在主namenode01节点用hadoop namenode -format格式化namenode和journalnode目录(hadoop namenode -format mycluster)
d.在主namenode节点启动./sbin/hadoop-daemon.sh start namenode进程(hadoop-daemon.sh start namenode)
e.在备节点执行第一行命令,这个是把备namenode节点的目录格式化并把元数据从主namenode节点copy过来,并且这个命令不会把journalnode目录再格式化了!
然后用第二个命令启动备namenode进程!
hdfs namenode –bootstrapStandby
hadoop-daemon.sh start namenode
f.在两个namenode节点都执行以下命令(hadoop-daemon.sh start zkfc)
g.在所有datanode节点都执行以下命令启动datanode(hadoop-daemon.sh start datanode)

你可能感兴趣的:(大数据云平台搭建,linux)