hadoop的四个核心配置文件详解

参考:

https://blog.csdn.net/cuitaixiong/article/details/51591410

https://blog.csdn.net/Gavin_chun/article/details/77936475

hadoop常用端口配置

1.HDFS 端口

参数 描述 默认 配置文件 例子值
fs.default.name namenode RPC交互端口 8020 core-site.xml hdfs://master:8020/
dfs.http.address NameNode web管理端口 50070 hdfs- site.xml 0.0.0.0:50070
dfs.datanode.address datanode 控制端口 50010 hdfs -site.xml 0.0.0.0:50010
dfs.datanode.ipc.address datanode的RPC服务器地址和端口 50020 hdfs-site.xml 0.0.0.0:50020
dfs.datanode.http.address datanode的HTTP服务器和端口 50075 hdfs-site.xml 0.0.0.0:50075

2.MR端口

参数 描述 默认 配置文件 例子值
参数 描述 默认 配置文件 例子值
mapred.job.tracker job-tracker交互端口 8021 mapred-site.xml hdfs://master:8021/
job tracker的web管理端口 50030 mapred-site.xml 0.0.0.0:50030
mapred.task.tracker.http.address task-tracker的HTTP端口 50060 mapred-site.xml 0.0.0.0:50060

3.其他端口

参数 描述 默认 配置文件 例子值
dfs.secondary.http.address secondary NameNode web管理端口 50090 hdfs-site.xml 0.0.0.0:50090
  1. mappered-site.xml配置:

https://www.cnblogs.com/ainubis/p/3990101.html

常用配置:

1. core-site.xml

hadoop.tmp.dir : 如果hdfs-site.xml中不配 置namenode和datanode的存放位置,默认就放在这个路径中
fs.checkpoint.dir : SecondNameNode用来存储checkpoint image文件 
fs.defaultFS : 默认使用的文件系统类型
fs.trash.interval : 垃圾箱文件保留多久(单位:分钟),默认值是0,不打开垃圾收回机制
hadoop.security.authentication :Hadoop使用的认证方法(simple或kerberos)
io.file.buffer.size : 读写序列文件缓冲区大小,默认设置为4096


<configuration>
    	
        <property>
                <name>fs.defaultFSname>
                <value>hdfs://master:9000value>
        property>
        
        <property>
                
                <name>fs.trash.checkpoint.intervalname>
                <value>0value>
        property>
        <property>
                
                <name>fs.trash.intervalname>
                <value>1440value>
        property>

         
        <property>   
                <name>hadoop.tmp.dirname>
                <value>/root/hadoop-2.7.4/dfs/data/tmpvalue>
        property>

         
        <property>
                <name>ha.zookeeper.quorumname>
                <value>master:2181,hadoop01:2181,hadoop02:2181value>
        property>
         
        <property>
                <name>ha.zookeeper.session-timeout.msname>
                <value>2000value>
        property>

        <property>
           <name>hadoop.proxyuser.root.hostsname>
           <value>*value> 
        property> 
        <property> 
            <name>hadoop.proxyuser.root.groupsname> 
            <value>*value> 
       property> 


      <property>
          <name>io.compression.codecsname>
          <value>org.apache.hadoop.io.compress.GzipCodec,
            org.apache.hadoop.io.compress.DefaultCodec,
            org.apache.hadoop.io.compress.BZip2Codec,
            org.apache.hadoop.io.compress.SnappyCodec
          value>
      property>
configuration>

2. hdfs-site.xml

dfs.namenode.name.dir : namenode存放fsimage的目录
dfs.datanode.data.dir : datanode存放数据块文件的目录
dfs.namenode.checkpoint.dir : Secondarynamenode启动时使用,放置sn做合并的fsimage及 editlog文件
dfs.replication : 数据副本数
dfs.blocksize : 文件Block大小
dfs.permissions : 对HDFS是否启用认证。默认为true
dfs.datanode.handler.count : Datanode IPC 请求处理线程数


<configuration>
    
    <property>
        <name>dfs.permissions.superusergroupname>
        <value>rootvalue>
    property>

    
    <property>
        <name>dfs.webhdfs.enabledname>
        <value>truevalue>
    property>
    
    <property>
        <name>dfs.namenode.name.dirname>
        <value>/root/hadoop-2.7.4/dfs/namevalue>
        <description> namenode 存放name table(fsimage)本地目录(需要修改)description>
    property>
    <property>
        <name>dfs.namenode.edits.dirname>
        <value>${dfs.namenode.name.dir}value>
        <description>namenode粗放 transaction file(edits)本地目录(需要修改)description>
    property>
    <property>
        <name>dfs.datanode.data.dirname>
        <value>/root/hadoop-2.7.4/dfs/datavalue>
        <description>datanode存放block本地目录(需要修改)description>
    property>
    
    <property>
        <name>dfs.replicationname>
        <value>3value>
    property>
    
    <property>
        <name>dfs.blocksizename>
        <value>268435456value>
    property>
    
    
    
    <property>
        <name>dfs.nameservicesname>
        <value>myclustervalue>
    property>
    <property>
        
        <name>dfs.ha.namenodes.myclustername>
        <value>nn1,nn2value>
    property>

    
    <property>
        <name>dfs.namenode.rpc-address.mycluster.nn1name>
        <value>master:8020value>
    property>
    <property>
        <name>dfs.namenode.rpc-address.mycluster.nn2name>
        <value>hadoop01:8020value>
    property>

    
    <property>
        <name>dfs.namenode.http-address.mycluster.nn1name>
        <value>master:50070value>
    property>
    <property>
        <name>dfs.namenode.http-address.mycluster.nn2name>
        <value>hadoop01:50070value>
    property>

    
    
    <property>
        <name>dfs.journalnode.http-addressname>
        <value>0.0.0.0:8480value>
    property>
    <property>
        <name>dfs.journalnode.rpc-addressname>
        <value>0.0.0.0:8485value>
    property>
    <property>
        
        
        <name>dfs.namenode.shared.edits.dirname>
        <value>qjournal://master:8485;hadoop01:8485;hadoop02:8485/myclustervalue>
    property>

    <property>
        
        <name>dfs.journalnode.edits.dirname>
        <value>/root/hadoop-2.7.7/dfs/journalvalue>
    property>
    
    <property>
        
                             
        <name>dfs.client.failover.proxy.provider.myclustername>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvidervalue>
    property>
    
    
    <property>
        <name>dfs.ha.fencing.methodsname>
        <value>sshfencevalue>
    property>
    <property>
        <name>dfs.ha.fencing.ssh.private-key-filesname>
        <value>/root/.ssh/id_rsavalue>
    property>
    <property>
        
        <name>dfs.ha.fencing.ssh.connect-timeoutname>
        <value>30000value>
    property>

    
    
    <property>
        <name>dfs.ha.automatic-failover.enabledname>
        <value>truevalue>
    property>
    
     <property>
       <name>dfs.hostsname>
       <value>/root/hadoop-2.7.7/etc/hadoop/slavesvalue>
     property>
configuration>

3. yarn-site.xml

yarn.scheduler.minimum-allocation-mb : Yarn分配内存的最小单位
yarn.scheduler.increment -allocation-mb : 内存分配递增最小单位
yarn.scheduler.maximum-allocation-mb : 每个container最多申请的内存上限




<configuration>
    
    <property>
        <name>yarn.nodemanager.aux-servicesname>
        <value>mapreduce_shufflevalue>
    property>
    <property>
        <name>yarn.nodemanager.aux-services.mapreduce.shuffle.classname>
        <value>org.apache.hadoop.mapred.ShuffleHandlervalue>
    property>
    <property>
        <name>yarn.nodemanager.localizer.addressname>
        <value>0.0.0.0:23344value>
        <description>Address where the localizer IPC is.description>
    property>
    <property>
        <name>yarn.nodemanager.webapp.addressname>
        <value>0.0.0.0:23999value>
        <description>NM Webapp address.description>
    property>

    
    
    <property>
        <name>yarn.resourcemanager.connect.retry-interval.msname>
        <value>2000value>
    property>
    <property>
        <name>yarn.resourcemanager.ha.enabledname>
        <value>truevalue>
    property>
    <property>
        <name>yarn.resourcemanager.ha.automatic-failover.enabledname>
        <value>truevalue>
    property>
    
    <property>
        <name>yarn.resourcemanager.ha.automatic-failover.embeddedname>
        <value>truevalue>
    property>
    
    <property>
        <name>yarn.resourcemanager.cluster-idname>
        <value>yarn-clustervalue>
    property>
    <property>
        <name>yarn.resourcemanager.ha.rm-idsname>
        <value>rm1,rm2value>
    property>


    

    <property>
        <name>yarn.resourcemanager.scheduler.classname>
        <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairSchedulervalue>
    property>
    <property>
        <name>yarn.resourcemanager.recovery.enabledname>
        <value>truevalue>
    property>
    <property>
        <name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-msname>
        <value>5000value>
    property>
    
    <property>
        <name>yarn.resourcemanager.store.classname>
        <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStorevalue>
    property>
    <property>
        <name>yarn.resourcemanager.zk-addressname>
        <value>master:2181,hadoop01:2181,hadoop02:2181value>
    property>
    <property>
        <name>yarn.resourcemanager.zk.state-store.addressname>
        <value>master:2181,hadoop01:2181,hadoop02:2181value>
    property>
    
    <property>
        <name>yarn.resourcemanager.address.rm1name>
        <value>master:23140value>
    property>
    <property>
        <name>yarn.resourcemanager.address.rm2name>
        <value>hadoop01:23140value>
    property>
    
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm1name>
        <value>master:23130value>
    property>
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm2name>
        <value>hadoop01:23130value>
    property>
    
    <property>
        <name>yarn.resourcemanager.admin.address.rm1name>
        <value>master:23141value>
    property>
    <property>
        <name>yarn.resourcemanager.admin.address.rm2name>
        <value>hadoop01:23141value>
    property>
    
    <property>
        <name>yarn.resourcemanager.resource-tracker.address.rm1name>
        <value>master:23125value>
    property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address.rm2name>
        <value>hadoop01:23125value>
    property>
    
    <property>
        <name>yarn.resourcemanager.webapp.address.rm1name>
        <value>master:8088value>
    property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm2name>
        <value>hadoop01:8088value>
    property>
    <property>
        <name>yarn.resourcemanager.webapp.https.address.rm1name>
        <value>master:23189value>
    property>
    <property>
        <name>yarn.resourcemanager.webapp.https.address.rm2name>
        <value>hadoop01:23189value>
    property>

    <property>
       <name>yarn.log-aggregation-enablename>
       <value>truevalue>
    property>
    <property>
         <name>yarn.log.server.urlname>
         <value>http://master:19888/jobhistory/logsvalue>
    property>


    <property>
        <name>yarn.nodemanager.resource.memory-mbname>
        <value>2048value>
    property>
    <property>
        <name>yarn.scheduler.minimum-allocation-mbname>
        <value>1024value>
        <discription>单个任务可申请最少内存,默认1024MBdiscription>
     property>


  <property>
    <name>yarn.scheduler.maximum-allocation-mbname>
    <value>2048value>
    <discription>单个任务可申请最大内存,默认8192MBdiscription>
  property>

   <property>
       <name>yarn.nodemanager.resource.cpu-vcoresname>
       <value>2value>
     property>

configuration>

4. mapred-site.xml

mapred.job.tracker : JobTracker的地址,格式为 hostname:port



<configuration>
    
    <property>
        <name>mapreduce.framework.namename>
        <value>yarnvalue>
    property>
    
    
    <property>
        <name>mapreduce.jobhistory.addressname>
        <value>master:10020value>
    property>
    
    <property>
        <name>mapreduce.jobhistory.webapp.addressname>
        <value>hadoop01:19888value>
    property>


  <property>
      <name>mapreduce.map.output.compressname> 
      <value>truevalue>
  property>

  <property>
      <name>mapreduce.map.output.compress.codecname> 
      <value>org.apache.hadoop.io.compress.SnappyCodecvalue>
   property>

configuration>

你可能感兴趣的:(Hadoop)