centos6.2-64位部署hadoop-1.0.4.tar.gz 和 jdk-7u17-linux-x64.tar.gz 
 
1.搭建环境介绍
 
服务器3台,
 
(namenode)master=192.168.1.198
 
主机名:master
IPADDR=192.168.0.198
NETMASK=255.255.255.0
GATEWAY=192.168.0.1
 
[root@namenode1 ~]# uname -a
Linux namenode1 2.6.32-220.el6.x86_64 #1 SMP Tue Dec 6 19:48:22 GMT 2011 x86_64 x86_64 x86_64 
 
GNU/Linux
 
[root@namenode1 ~]# more /etc/redhat-release 
CentOS release 6.2 (Final)
 
(datanode1)slave1=192.168.1.199
主机名:slave1
IPADDR=192.168.0.199
NETMASK=255.255.255.0
GATEWAY=192.168.0.1
 
[root@datanode1 ~]# uname -a
more /etc/redhat-releaseLinux datanode1 2.6.32-220.el6.x86_64 #1 SMP Tue Dec 6 19:48:22 GMT 2011 
 
x86_64 x86_64 x86_64 GNU/Linux
 
[root@datanode1 ~]# more /etc/redhat-release
CentOS release 6.2 (Final)
[root@datanode1 ~]# 
 
(datanode2)slave2=192.168.1.200
主机名:slave2
 
IPADDR=192.168.0.200
NETMASK=255.255.255.0
GATEWAY=192.168.0.1
 
[root@datanode2 ~]# uname -a
Linux datanode2 2.6.32-220.el6.x86_64 #1 SMP Tue Dec 6 19:48:22 GMT 2011 x86_64 x86_64 x86_64 
 
GNU/Linux
 
[root@datanode2 ~]# more /etc/redhat-release
CentOS release 6.2 (Final)
[root@datanode2 ~]# 
 
 
2.搭建准备工作,以下操作用root用户在每台服务器上执行
 
#安装使用软件
yum install -y rsync openssh-clients 
 
#关闭防火墙,停止开机启动     
/etc/init.d/iptables stop
chkconfig --del iptables 
chkconfig iptables off
 
#修改系统使用字体为UTF8
sed -i 's@LANG=.*$@LANG=\"en_US.UTF-8\"@g' /etc/sysconfig/i18n 
 
#关闭selinux
sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config
setenforce 0
 
#关闭ipv6
echo "alias net-pf-10 off" >> /etc/modprobe.conf
 
echo "alias ipv6 off" >> /etc/modprobe.conf
 
/sbin/chkconfig --level 35 ip6tables off
 
#添加hosts文件,实现主机名解析
echo '192.168.1.198 master' >>/etc/hosts
echo '192.168.1.199 slave1' >>/etc/hosts
echo '192.168.1.200 slave2' >>/etc/hosts
 
 
#修改sshd 配置文件,支持证书验证登陆
sed -i 's/#RSAAuthentication yes/RSAAuthentication yes/g' /etc/ssh/sshd_config 
sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/g' /etc/ssh/sshd_config 
sed -i 's/#AuthorizedKeysFile/AuthorizedKeysFile/g' /etc/ssh/sshd_config 
 
 
#配置java、hadoop环境变量
#在/etc/profile的最后加上这一段就好了
 
echo 'export JAVA_HOME=/usr/local/jdk1.7.0_17' >>/etc/profile
echo 'export HADOOP_HOME=/home/hadoop/hadoop' >>/etc/profile
echo 'export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin'>>/etc/profile
echo 'export CLASSPATH=.:JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar'>>/etc/profile
 
source /etc/profile
 
#建立hadoop用户,设置初始密码为1
useradd hadoop
echo 1|passwd hadoop          --stdin
 
 
#以上操作完成后,推荐重启服务器
init 6
 
 
3.配置测试工作
 
#切换到hadoop用户
su - hadoop
 
#建立工作文件夹,存放安装软件
mkdir /workspace
 
 
#建立一个ip地址清单,测试使用
echo '192.168.1.198' >>/workspace/list.txt
echo '192.168.1.199' >>/workspace/list.txt
echo '192.168.1.200' >>/workspace/list.txt
 
#开始测试,在每台服务器上都要只想以下测试动作,测试上面配置是否正确
 
#ping 主机名解析测试,通过ping 主机名能够解析到对应的IP地址
ping -c3 master &&  ping -c3 slave1 && ping -c3 slave2
 
#验证sshd配置文件修改是否成功
sed -n '47p;48p;49p'   /etc/ssh/sshd_config 
RSAAuthentication yes
PubkeyAuthentication yes
AuthorizedKeysFile      .ssh/authorized_keys
 
#查看java环境变量
[root@master ~]# env |grep JAVA
JAVA_HOME=/usr/local/jdk1.7.0_17
CLASSPATH=.:JAVA_HOME/lib/dt.jar:/usr/local/jdk1.7.0_17/lib/tools.jar
 
#查看hadoop环境变量
[root@master ~]# env |grep hadoop
HADOOP_HOME=/hadoop/hadoop
 
4.配置ssh无密码,证书登陆,
 
#以下操作在master 服务器上配置
 
#制作证书,一路回车到结束
ssh-keygen
 
#使本机程序可以通过ssh登录本机
cat ~/.ssh/id_rsa.pub  >>~/.ssh/authorized_keys 
 
#将本地证书写入到两台slave主机 ~/.ssh/authorized_keys 
ssh-copy-id slave1                                     
ssh-copy-id slave2
 
#测试无密码能够顺利登陆两台slave主机,第一次登陆要输入yes;
 
for ip in $(cat /workspace/list.txt);do echo -------$ip------ ;ssh $ip hostname ;done
-------192.168.1.198------
master
-------192.168.1.199------
slave1
-------192.168.1.200------
slave2
 
 
 
 
5.下载、安装JDK、hadoop
 
jdk 下载地址
 
http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html
jdk-7u17-linux-x64.tar.gz
 
hadoop 下载地址
wget http://labs.renren.com/apache-mirror/hadoop/core/hadoop-1.0.4/hadoop-1.0.4.tar.gz
hadoop-1.0.4.tar.gz
 
#master服务器安装jdk、hadoop
 
cd /workspace/
tar -xvf hadoop-1.0.4.tar.gz 
cp -r hadoop-1.0.4 /home/hadoop/hadoop
 
tar -xvf jdk-7u17-linux-x64.tar.gz 
cp -rp jdk1.7.0_17 /usr/local/
source /etc/profile
 
 
#测试jdk安装是否成功
 
[root@master workspace]# java -version
java version "1.7.0_17"
Java(TM) SE Runtime Environment (build 1.7.0_17-b02)
Java HotSpot(TM) 64-Bit Server VM (build 23.7-b01, mixed mode)
 
 
6.修改hadoop配置文件
 
[hadoop@master conf]$ pwd
/home/hadoop/hadoop/conf
 
#修改hadoop的jdk环境变量文件
 
vi hadoop-env.sh  //添加JAVA_HOME路径
 
export JAVA_HOME=/usr/local/jdk1.7.0_17
 
#修改节点核心配置文件
$ vi core-site.xml
 
 
 
 
 
 
 
 
 
 
fs.default.name
 
hdfs://master:9000
 
 
 
 
#修改hdf配置
 
$ vi hdfs-site.xml
 
 
 
 
 
 
 
 
 
 
dfs.replication
 
2
 
 
 
 
~
 
 
 
 #修改mapred相关配置
 
$ vim mapred-site.xml
 
 
 
 
 
 
 
 
 
 
mapred.job.tracker
 
master:9001
 
 
 
 
#修改指定namenode配置文件
vi masters
 
master
 
 
#修改指定datanode配置文件
 
vi slaves
 
slave1
slave2
 
 
7.配置datanode
 
所有的datanode(slave1 & slave1)配置都和namenode (master)一样,复制过去即可!
 
cd /home/hadoop
 
#将配置文件从master传输到 slave1
 
scp -r hadoop slave1:/home/hadoop/
rsync -avrpP /etc/profile  root@slave1:/etc/
rsync -avrpP /usr/local/jdk1.7.0_17  root@slave1:/usr/local/
 
#检查变量环境变量是否生效
[hadoop@slave1 ~]$ source /etc/profile
[hadoop@slave1 ~]$ env |grep JAVA
JAVA_HOME=/usr/local/jdk1.7.0_17
CLASSPATH=.:JAVA_HOME/lib/dt.jar:/usr/local/jdk1.7.0_17/lib/tools.jar
 
#检查jdk运行状态是否正常
[hadoop@slave1 ~]$ java -version
java version "1.7.0_17"
Java(TM) SE Runtime Environment (build 1.7.0_17-b02)
Java HotSpot(TM) 64-Bit Server VM (build 23.7-b01, mixed mode)
 
 
#将配置文件从master传输到 slave2
 
scp -r hadoop slave2:/home/hadoop/
rsync -avrpP /etc/profile  root@slave2:/etc/
rsync -avrpP /usr/local/jdk1.7.0_17  root@slave2:/usr/local/
 
 
#检查变量环境变量是否生效
source /etc/profile
[hadoop@slave2 ~]$ env |grep JAVA
JAVA_HOME=/usr/local/jdk1.7.0_17
CLASSPATH=.:JAVA_HOME/lib/dt.jar:/usr/local/jdk1.7.0_17/lib/tools.jar
 
#检查jdk运行状态是否正常
[hadoop@slave2 ~]$ java -version
java version "1.7.0_17"
Java(TM) SE Runtime Environment (build 1.7.0_17-b02)
Java HotSpot(TM) 64-Bit Server VM (build 23.7-b01, mixed mode)
 
8.格式化HDFS文件系统
pwd
/home/hadoop/hadoop
bin/hadoop namenode –format
 
#启动服务
bin/start-all.sh
 
#停止服务
bin/stop-all.sh
 
9.访问登陆页面
 
#浏览器访问
http://192.168.1.198:50070/
 
NameNode 'master:9000'
Started:  Wed Mar 20 20:57:14 CST 2013  
Version:  1.0.4, r1393290  
Compiled:  Wed Oct 3 05:13:58 UTC 2012 by hortonfo  
Upgrades:  There are no upgrades in progress.  
 
 
Browse the filesystem
Namenode Logs 
--------------------------------------------------------------------------------
 
Cluster Summary
6 files and directories, 1 blocks = 7 total. Heap Size is 33.64 MB / 966.69 MB (3%) 
 
Configured Capacity : 34.53 GB 
DFS Used : 68 KB 
Non DFS Used : 5.19 GB 
DFS Remaining : 29.34 GB 
DFS Used% : 0 % 
DFS Remaining% : 84.96 % 
Live Nodes  : 2 
Dead Nodes  : 0 
Decommissioning Nodes  : 0 
Number of Under-Replicated Blocks : 0 
 
 
 
----------------------------------------------------
 
NameNode Storage: 
Storage Directory Type State 
/tmp/hadoop-hadoop/dfs/name IMAGE_AND_EDITS Active 
 
 
#命令行查看
#可用jps查看守护进程是否启动
[hadoop@master tmp]$ jps
4198 JobTracker
4745 Jps
4107 SecondaryNameNode
3955 NameNode
 
#报告hdfs的基本统计信息
 
hadoop dfsadmin -report
 
Configured Capacity: 37073182720 (34.53 GB)
Present Capacity: 31498399744 (29.34 GB)
DFS Remaining: 31498326016 (29.34 GB)
DFS Used: 73728 (72 KB)
DFS Used%: 0%
Under replicated blocks: 0
Blocks with corrupt replicas: 0
Missing blocks: 0
 
-------------------------------------------------
Datanodes available: 2 (2 total, 0 dead)
 
Name: 192.168.1.199:50010
Decommission Status : Normal
Configured Capacity: 18536591360 (17.26 GB)
DFS Used: 32768 (32 KB)
Non DFS Used: 2788442112 (2.6 GB)
DFS Remaining: 15748116480(14.67 GB)
DFS Used%: 0%
DFS Remaining%: 84.96%
Last contact: Thu Mar 21 01:33:31 CST 2013
 
 
Name: 192.168.1.200:50010
Decommission Status : Normal
Configured Capacity: 18536591360 (17.26 GB)
DFS Used: 40960 (40 KB)
Non DFS Used: 2786340864 (2.59 GB)
DFS Remaining: 15750209536(14.67 GB)
DFS Used%: 0%
DFS Remaining%: 84.97%
Last contact: Thu Mar 21 01:33:33 CST 2013
 
 
#文件上传演示
 
#新建目录
mkdir input
cd input/
echo 'this is test1 file!' >test1
echo 'this is test2 file!' >test2
cd ..
#将目录上传的文件
hadoop dfs -put input in
 
 
#创建health文件夹
 hadoop fs -mkdir hdfs://192.168.1.198:9000/health
 
#将本地的文件上传到hadoop的health文件夹
hadoop fs -copyFromLocal hadoop-1.0.4.tar.gz hdfs://192.168.1.198:9000/health/
 
#查看上传的文件
hadoop fs -ls hadoop-1.0.4.tar.gz hdfs://192.168.1.198:9000/health/
 
 
 
#管理与更新命令:
 
报告hdfs的基本统计信息
 
$ bin/hadoop dfsadmin –report
 
 
 
进入安全模式(安全模式下文件系统不允许修改)
 
$ bin/hadoop dfsadmin -safemode enter
 
Safe mode is ON
 
退出安全模式(需要等待17秒生效)
 
$ bin/hadoop dfsadmin -safemode leave
 
Safe mode is OFF
 
 
 
启动负载均衡
 
$ bin/start-balancer.sh