1、准备4台服务器
一台作为namenode
192.168.137.101 hd1
三台作为datanode
192.168.137.102 hd2
192.168.137.103 hd3
192.168.137.104 hd4
2、拉平所有服务器的时间
使用ntpdate
略
3、配置多机互信
略
4、解压hadoop
tar zxvf hadoop-2.2.0.tar.gz
5、移动hadoop到相应位置
mv hadoop-2.2.0 /home/hadoop/hadoop
6、在namenode建立相应目录
应该是755权限
mkdir /home/hadoop/hdfs
mkdir /home/hadoop/namenode
mkdir /home/hadoop/tmp
7、在datanode建立相应的目录
mkdir /home/hadoop/hdfs
mkdir /home/hadoop/tmp
8、配置hadoop
配置文件有7个:
/home/hadoop/hadoop/etc/hadoop/hadoop-env.sh
/home/hadoop/hadoop/etc/hadoop/yarn-env.sh
/home/hadoop/hadoop/etc/hadoop/slaves
/home/hadoop/hadoop/etc/hadoop/core-site.xml
/home/hadoop/hadoop/etc/hadoop/hdfs-site.xml
/home/hadoop/hadoop/etc/hadoop/mapred-site.xml
/home/hadoop/hadoop/etc/hadoop/yarn-site.xml
# /home/hadoop/hadoop/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/java
export HADOOP_HOME=/home/hadoop/hadoop
# /home/hadoop/hadoop/etc/hadoop/yarn-env.sh
export JAVA_HOME=/java
# /home/hadoop/hadoop/etc/hadoop/slaves
hd2
hd3
hd4
# /home/hadoop/hadoop/etc/hadoop/core-site.xml
<property>
<name>fs.defaultFS</name>
<value>hdfs://hd1:9000</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/home/hadoop/tmp</value>
</property>
<property>
<name>hadoop.proxyuser.hduser.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hduser.groups</name>
<value>*</value>
</property>
# /home/hadoop/hadoop/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.http-address</name>
<value>hd1:50070</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hd1:9001</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/data/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/data/hdfs</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.support.append</name>
<value>true</value>
</property>
<property>
<name>dfs.support.broken.append</name>
<value>true</value>
</property>
</configuration>
# /home/hadoop/hadoop/etc/hadoop/mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hd1:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hd1:19888</value>
</property>
# /home/hadoop/hadoop/etc/hadoop/yarn-site.xml
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>hd1:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>hd1:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>hd1:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>hd1:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>hd1:8088</value>
</property>
8、拷贝hadoop到所有datanode
scp -r /home/hadoop/hadoop/ hd2:/home/hadoop/
scp -r /home/hadoop/hadoop/ hd3:/home/hadoop/
scp -r /home/hadoop/hadoop/ hd4:/home/hadoop/
9、格式化namenode
hadoop namenode -format
10、启动dfs
start-dfs.sh
11、查看状态
http://hd1:50070/dfsnodelist.jsp