Hadoop2.2.0环境配置说明(多节点配置在最后)
1. 关闭防火墙# chkconfig iptables off
2. 检查状态#chkconfig –list|grep iptables 全off即可
3. 将hadoop-2.2.0.tar.gz文件复制到/usr/local目录下
4. 解压# tar –zxvf hadoop-2.2.0.tar.gz
5. 改名 # mv Hadoop-2.2.0 hadoop2.2
6. 修改环境变量 # vim /etc/profile
7. 添加 export HADOOP_HOME=/usr/local/hadoop2.2
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
PATH下添加:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
8. 重载# source /etc/profile
9. 目录切换到 # cd /usr/local/hadoop2.2/etc/Hadoop
10. 在如下文件中添加对应内容
11. hadoop-env.sh 第27行修改为
export JAVA_HOME=/usr/local/jdk1.6
12. yarn-env.sh 第23行修改为
export JAVA_HOME=/usr/local/jdk1.6
13. 将mapred-site.xml.template 复制为mapred-site.xml
# cp mapred-site.xml.template mapred-site.xml
14. mapred-site.xml中第20行(configuration标签内)添加
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--
<property>
<name>mapreduce.cluster.temp.dir</name>
<value></value>
<description>No description</description>
<final>true</final>
</property>
<property>
<name>mapreduce.cluster.local.dir</name>
<value></value>
<description>No description</description>
<final>true</final>
</property>
-->
15. yarn-site.xml第18行(configuration标签内)添加
<property>
<name>yarn.resourcemanager.hostname</name>
<value>localhost</value>
<description>hostanem of RM</description>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>localhost:5274</value>
<description>host is the hostname of the resource manager and
port is the port on which the NodeManagers contact the Resource Manager.
</description>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>localhost:5273</value>
<description>host is the hostname of the resourcemanager and port is the port
on which the Applications in the cluster talk to the Resource Manager.
</description>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
<description>In case you do not want to use the default scheduler</description>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>localhost:5271</value>
<description>the host is the hostname of the ResourceManager and the port is the port on which the clients can talk to the Resource Manager. </description>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value></value>
<description>the local directories used by the nodemanager</description>
</property>
<property>
<name>yarn.nodemanager.address</name>
<value>localhost:5272</value>
<description>the nodemanagers bind to this port</description>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>10240</value>
<description>the amount of memory on the NodeManager in GB</description>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/app-logs</value>
<description>directory on hdfs where the application logs are moved to </description>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/usr/log</value>
<description>the directories used by Nodemanagers as log directories</description>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
<description>shuffle service that needs to be set for Map Reduce to run </description>
</property>
16. core-site.xml第20行(configuration标签内)添加
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop2.2/tmp</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
<final>true</final>
</property>
17. hdfs-site.xml第20行(configuration标签内)添加
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///dfs/name</value>
<final>true</final>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///dfs/data</value>
<final>true</final>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
hadoop-env.sh里面# export JAVA_HOME=/usr/local/jdk
18. 设置ssh
# ssh-keygen –t rsa 一路回车默认值
进行查看 (应有id_rsa和id_rsa.pub一对密钥文件)
# cd ~/.ssh
# ls
复制出公钥
# cp id_rsa.pub authorized_keys
查看# ls 应有三个文件了
确认过程
# ssh localhost (输入yes)
# exit
# ssh localhost
19. 格式化
# hadoop namenode –format
20. 启动
#start-dfs.sh
#start-yarn.sh
21. 查看 # jps 应有6个
22. 用自带浏览器,能打开http://localhost:50070/ 和http://localhost:8088/cluster即可
(多机环境配置)
23. DNS配置(建议修改之后重启虚拟机)
24. 将主机名换为hadoop2
# vim /etc/sysconfig/network
第2行,localhost.localdomain改为hadoop2
25. 修改地址映射
# vim /etc/hosts
第三行添加192.168.100.11 hadoop2 (地址不固定)