xml version="1.0" encoding="UTF-8"?>
xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.nameservicesname>
<value>myhadoopvalue>
<description>
Comma-separated list of nameservices.
as same as fs.defaultFS in core-site.xml.
description>
property>
<property>
<name>dfs.ha.namenodes.myhadoopname>
<value>nn1,nn2value>
<description>
The prefix for a given nameservice, contains a comma-separated
list of namenodes for a given nameservice (eg EXAMPLENAMESERVICE).
description>
property>
<property>
<name>dfs.namenode.rpc-address.myhadoop.nn1name>
<value>master:8020value>
<description>
RPC address for nomenode1 of hadoop-test
description>
property>
<property>
<name>dfs.namenode.rpc-address.myhadoop.nn2name>
<value>slave1:8020value>
<description>
RPC address for nomenode2 of hadoop-test
description>
property>
<property>
<name>dfs.namenode.http-address.myhadoop.nn1name>
<value>master:50070value>
<description>
The address and the base port where the dfs namenode1 web ui will listen
on.
description>
property>
<property>
<name>dfs.namenode.http-address.myhadoop.nn2name>
<value>slave1:50070value>
<description>
The address and the base port where the dfs namenode2 web ui will listen
on.
description>
property>
<property>
<name>dfs.namenode.servicerpc-address.myhadoop.n1name>
<value>master:53310value>
property>
<property>
<name>dfs.namenode.servicerpc-address.myhadoop.n2name>
<value>slave1:53310value>
property>
<property>
<name>dfs.ha.automatic-failover.enabledname>
<value>truevalue>
<description>
Whether automatic failover is enabled. See the HDFS High
Availability documentation for details on automatic HA
configuration.
description>
property>
<property>
<name>dfs.client.failover.proxy.provider.myhadoopname>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
value>
<description>Configure the name of the Java class which will be used
by the DFS Client to determine which NameNode is the current Active,
and therefore which NameNode is currently serving client requests.
这个类是Client的访问代理,是HA特性对于Client透明的关键!
description>
property>
<property>
<name>dfs.ha.fencing.methodsname>
<value>sshfencevalue>
<description>how to communicate in the switch processdescription>
property>
<property>
<name>dfs.ha.fencing.ssh.private-key-filesname>
<value>/home/yarn/.ssh/id_rsavalue>
<description>the location stored ssh keydescription>
property>
<property>
<name>dfs.ha.fencing.ssh.connect-timeoutname>
<value>1000value>
property>
<property>
<name>dfs.journalnode.edits.dirname>
<value>/home/yarn/Hadoop/hadoop-2.4.1/hdfs_dir/journal/value>
property>
<property>
<name>dfs.namenode.shared.edits.dirname>
<value>qjournal://master:8485;slave1:8485;slave2:8485/hadoop-journal
value>
<description>A directory on shared storage between the multiple
namenodes
in an HA cluster. This directory will be written by the active and read
by the standby in order to keep the namespaces synchronized. This
directory
does not need to be listed in dfs.namenode.edits.dir above. It should be
left empty in a non-HA cluster.
description>
property>
<property>
<name>dfs.namenode.name.dirname>
<value>file:///home/yarn/Hadoop/hadoop-2.4.1/hdfs_dir/namevalue>
<description>Path on the local filesystem where the NameNode stores
the namespace and transactions logs persistently.If this is a
comma-delimited list of directories then the name table is replicated
in all of the directories, for redundancy.description>
property>
<property>
<name>dfs.blocksizename>
<value>1048576value>
<description>
HDFS blocksize of 128MB for large file-systems.
Minimum block size is
1048576
.
description>
property>
<property>
<name>dfs.namenode.handler.countname>
<value>10value>
<description>More NameNode server threads to handle RPCs from large
number of DataNodes.description>
property>
<property>
<name>dfs.datanode.data.dirname>
<value>file:///home/yarn/Hadoop/hadoop-2.4.1/hdfs_dir/datavalue>
<description>Comma separated list of paths on the local filesystem of
a DataNode where it should store its blocks.If this is a
comma-delimited list of directories, then data will be stored in all
named directories, typically on different devices.description>
property>
configuration>
5.yarn-site.xml
xml version="1.0"?>
<configuration>
<property>
<name>yarn.acl.enablename>
<value>falsevalue>
<description>Enable ACLs? Defaults to false.description>
property>
<property>
<name>yarn.admin.aclname>
<value>*value>
<description>
ACL to set admins on the cluster. ACLs are of for comma-separated-usersspace comma-separated-groups.
Defaults to special value of * which means anyone. Special value of just space means no one has access.
description>
property>
<property>
<name>yarn.log-aggregation-enablename>
<value>falsevalue>
<description>Configuration to enable or disable log aggregationdescription>
property>
<property>
<name>yarn.resourcemanager.hostnamename>
<value>mastervalue>
<description>The hostname of the RM.description>
property>
<property>
<name>yarn.resourcemanager.webapp.https.addressname>
<value>${yarn.resourcemanager.hostname}:8090value>
<description>The https adddress of the RM web application.description>
property>
<property>
<name>yarn.resourcemanager.addressname>
<value>${yarn.resourcemanager.hostname}:8032value>
<description>ResourceManager host:port for clients to submit jobs.description>
property>
<property>
<name>yarn.resourcemanager.scheduler.addressname>
<value>${yarn.resourcemanager.hostname}:8030value>
<description>ResourceManager host:port for ApplicationMasters to talk to Scheduler to obtain resources.description>
property>
<property>
<name>yarn.resourcemanager.resource-tracker.addressname>
<value>${yarn.resourcemanager.hostname}:8031value>
<description>ResourceManager host:port for NodeManagers.description>
property>
<property>
<name>yarn.resourcemanager.admin.addressname>
<value>${yarn.resourcemanager.hostname}:8033value>
<description>ResourceManager host:port for administrative commands.description>
property>
<property>
<name>yarn.resourcemanager.webapp.addressname>
<value>${yarn.resourcemanager.hostname}:8088value>
<description>ResourceManager web-ui host:port.description>
property>
<property>
<name>yarn.resourcemanager.scheduler.classname>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairSchedulervalue>
<description>
ResourceManager Scheduler class.
CapacityScheduler (recommended), FairScheduler (also recommended), or FifoScheduler
description>
property>
<property>
<name>yarn.scheduler.minimum-allocation-mbname>
<value>512value>
<description>
Minimum limit of memory to allocate to each container request at the Resource Manager.
In MBs
description>
property>
<property>
<name>yarn.scheduler.maximum-allocation-mbname>
<value>2048value>
<description>
Maximum limit of memory to allocate to each container request at the Resource Manager.
In MBs.
According to my configuration,
yarn.scheduler.maximum-allocation-
mb >
yarn.nodemanager.resource.memory-mb
description>
property>
<property>
<name>yarn.nodemanager.resource.memory-mbname>
<value>1024value>
<description>
Resource i.e. available physical memory, in MB, for given NodeManager.
Defines total available resources on the NodeManager to be made available to running containers.
description>
property>
<property>
<name>yarn.nodemanager.vmem-pmem-rationame>
<value>2.1value>
<description>
Ratio between virtual memory to physical memory when setting memory limits for containers.
Container allocations are expressed in terms of physical memory,
and virtual memory usage is allowed to exceed this allocation by this ratio.
description>
property>
<property>
<name>yarn.nodemanager.local-dirsname>
<value>/home/yarn/Hadoop/hadoop-2.4.1/yarn_dir/localvalue>
<description>
Comma-separated list of paths on the local filesystem where intermediate data is written.
Multiple paths help spread disk i/o.
description>
property>
<property>
<name>yarn.nodemanager.log-dirsname>
<value>/home/yarn/Hadoop/hadoop-2.4.1/yarn_dir/logvalue>
<description>
Comma-separated list of paths on the local filesystem where logs are written.
Multiple paths help spread disk i/o.
description>
property>
<property>
<name>yarn.nodemanager.log.retain-secondsname>
<value>10800value>
<description>
Default time (in seconds) to retain log files on the NodeManager.
***Only applicable if log-aggregation is disabled.
description>
property>
<property>
<name>yarn.nodemanager.remote-app-log-dirname>
<value>/yarn/log-aggregationvalue>
<description>
HDFS directory where the application logs are moved on application completion.
Need to set appropriate permissions.
***Only applicable if log-aggregation is enabled.
description>
property>
<property>
<name>yarn.nodemanager.remote-app-log-dir-suffixname>
<value>logsvalue>
<description>
Suffix appended to the remote log dir.
Logs will be aggregated to ${yarn.nodemanager.remote-app-log-dir}/${user}/${thisParam}.
***Only applicable if log-aggregation is enabled.
description>
property>
<property>
<name>yarn.nodemanager.aux-servicesname>
<value>mapreduce_shufflevalue>
<description>Shuffle service that needs to be set for Map Reduce applications.description>
property>
<property>
<name>yarn.nodemanager.resource.cpu-vcoresname>
<value>1value>
<description>Number of CPU cores that can be allocated for containers.description>
property>
<property>
<name>yarn.log-aggregation.retain-secondsname>
<value>-1value>
<description>
How long to keep aggregation logs before deleting them.
-1 disables.
Be careful, set this too small and you will spam the name node.
description>
property>
<property>
<name>yarn.log-aggregation.retain-check-interval-secondsname>
<value>-1value>
<description>
Time between checks for aggregated log retention.
If set to 0 or a negative value then the value is computed as one-tenth of the aggregated log retention time.
Be careful, set this too small and you will spam the name node.
description>
property>
<property>
<name>yarn.scheduler.fair.allocation.filename>
<value>${yarn.home.dir}/etc/hadoop/fairscheduler.xmlvalue>
<description>fairscheduler config file pathdescription>
property>
configuration>
xml version="1.0"?>
xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.namename>
<value>yarnvalue>
<description>Execution framework set to Hadoop YARN.description>
property>
<property>
<name>mapreduce.map.memory.mbname>
<value>1024value>
<description>Larger resource limit for maps.description>
property>
<property>
<name>mapreduce.map.java.optsname>
<value>-Xmx1024Mvalue>
<description>Larger heap-size for child jvms of maps.description>
property>
<property>
<name>mapreduce.reduce.memory.mbname>
<value>1024value>
<description>Larger resource limit for reduces.description>
property>
<property>
<name>mapreduce.reduce.java.optsname>
<value>-Xmx1024Mvalue>
<description>Larger heap-size for child jvms of reduces.description>
property>
<property>
<name>mapreduce.task.io.sort.mbname>
<value>1024value>
<description>Higher memory-limit while sorting data for efficiency.description>
property>
<property>
<name>mapreduce.task.io.sort.factorname>
<value>10value>
<description>More streams merged at once while sorting files.description>
property>
<property>
<name>mapreduce.reduce.shuffle.parallelcopiesname>
<value>20value>
<description>Higher number of parallel copies run by reduces to fetch outputs from very large number of maps.description>
property>
<property>
<name>mapreduce.jobhistory.addressname>
<value>slave1:10020value>
<description>MapReduce JobHistory Server host:port. Default port is 10020.description>
property>
<property>
<name>mapreduce.jobhistory.webapp.addressname>
<value>slave1:19888value>
<description>MapReduce JobHistory Server Web UI host:port. Default port is 19888.description>
property>
<property>
<name>mapreduce.jobhistory.intermediate-done-dirname>
<value>/home/yarn/Hadoop/hadoop-2.4.1/mr_history/tmpvalue>
<description>Directory where history files are written by MapReduce jobs.description>
property>
<property>
<name>mapreduce.jobhistory.done-dirname>
<value>/home/yarn/Hadoop/hadoop-2.4.1/mr_history/donevalue>
<description>Directory where history files are managed by the MR JobHistory Server.description>
property>
configuration>