centos6.2-64位部署hadoop-1.0.4.tar.gz 和 jdk-7u17-linux-x64.tar.gz
1.搭建环境介绍
服务器3台,
(namenode)master=192.168.1.198
主机名:master
IPADDR=192.168.0.198
NETMASK=255.255.255.0
GATEWAY=192.168.0.1
[root@namenode1 ~]# uname -a
Linux namenode1 2.6.32-220.el6.x86_64 #1 SMP Tue Dec 6 19:48:22 GMT 2011 x86_64 x86_64 x86_64
GNU/Linux
[root@namenode1 ~]# more /etc/redhat-release
CentOS release 6.2 (Final)
(datanode1)slave1=192.168.1.199
主机名:slave1
IPADDR=192.168.0.199
NETMASK=255.255.255.0
GATEWAY=192.168.0.1
[root@datanode1 ~]# uname -a
more /etc/redhat-releaseLinux datanode1 2.6.32-220.el6.x86_64 #1 SMP Tue Dec 6 19:48:22 GMT 2011
x86_64 x86_64 x86_64 GNU/Linux
[root@datanode1 ~]# more /etc/redhat-release
CentOS release 6.2 (Final)
[root@datanode1 ~]#
(datanode2)slave2=192.168.1.200
主机名:slave2
IPADDR=192.168.0.200
NETMASK=255.255.255.0
GATEWAY=192.168.0.1
[root@datanode2 ~]# uname -a
Linux datanode2 2.6.32-220.el6.x86_64 #1 SMP Tue Dec 6 19:48:22 GMT 2011 x86_64 x86_64 x86_64
GNU/Linux
[root@datanode2 ~]# more /etc/redhat-release
CentOS release 6.2 (Final)
[root@datanode2 ~]#
2.搭建准备工作,以下操作用root用户在每台服务器上执行
#安装使用软件
yum install -y rsync openssh-clients
#关闭防火墙,停止开机启动
/etc/init.d/iptables stop
chkconfig --del iptables
chkconfig iptables off
#修改系统使用字体为UTF8
sed -i 's@LANG=.*$@LANG=\"en_US.UTF-8\"@g' /etc/sysconfig/i18n
#关闭selinux
sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config
setenforce 0
#关闭ipv6
echo "alias net-pf-10 off" >> /etc/modprobe.conf
echo "alias ipv6 off" >> /etc/modprobe.conf
/sbin/chkconfig --level 35 ip6tables off
#添加hosts文件,实现主机名解析
echo '192.168.1.198
master' >>/etc/hosts
echo '192.168.1.199
slave1' >>/etc/hosts
echo '192.168.1.200
slave2' >>/etc/hosts
#修改sshd 配置文件,支持证书验证登陆
sed -i 's/#RSAAuthentication yes/RSAAuthentication yes/g' /etc/ssh/sshd_config
sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/g' /etc/ssh/sshd_config
sed -i 's/#AuthorizedKeysFile/AuthorizedKeysFile/g' /etc/ssh/sshd_config
#配置java、hadoop环境变量
#在/etc/profile的最后加上这一段就好了
echo 'export JAVA_HOME=/usr/local/jdk1.7.0_17' >>/etc/profile
echo 'export HADOOP_HOME=/home/hadoop/hadoop' >>/etc/profile
echo 'export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin'>>/etc/profile
echo 'export CLASSPATH=.:JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar'>>/etc/profile
source /etc/profile
#建立hadoop用户,设置初始密码为1
useradd hadoop
echo 1|passwd hadoop --stdin
#以上操作完成后,推荐重启服务器
init 6
3.配置测试工作
#切换到hadoop用户
su - hadoop
#建立工作文件夹,存放安装软件
mkdir /workspace
#建立一个ip地址清单,测试使用
echo '192.168.1.198' >>/workspace/list.txt
echo '192.168.1.199' >>/workspace/list.txt
echo '192.168.1.200' >>/workspace/list.txt
#开始测试,在每台服务器上都要只想以下测试动作,测试上面配置是否正确
#ping 主机名解析测试,通过ping 主机名能够解析到对应的IP地址
ping -c3 master && ping -c3 slave1 && ping -c3 slave2
#验证sshd配置文件修改是否成功
sed -n '47p;48p;49p' /etc/ssh/sshd_config
RSAAuthentication yes
PubkeyAuthentication yes
AuthorizedKeysFile .ssh/authorized_keys
#查看java环境变量
[root@master ~]# env |grep JAVA
JAVA_HOME=/usr/local/jdk1.7.0_17
CLASSPATH=.:JAVA_HOME/lib/dt.jar:/usr/local/jdk1.7.0_17/lib/tools.jar
#查看hadoop环境变量
[root@master ~]# env |grep hadoop
HADOOP_HOME=/hadoop/hadoop
4.配置ssh无密码,证书登陆,
#以下操作在master 服务器上配置
#制作证书,一路回车到结束
ssh-keygen
#使本机程序可以通过ssh登录本机
cat ~/.ssh/id_rsa.pub >>~/.ssh/authorized_keys
#将本地证书写入到两台slave主机 ~/.ssh/authorized_keys
ssh-copy-id slave1
ssh-copy-id slave2
#测试无密码能够顺利登陆两台slave主机,第一次登陆要输入yes;
for ip in $(cat /workspace/list.txt);do echo -------$ip------ ;ssh $ip hostname ;done
-------192.168.1.198------
master
-------192.168.1.199------
slave1
-------192.168.1.200------
slave2
5.下载、安装JDK、hadoop
jdk 下载地址
http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html
jdk-7u17-linux-x64.tar.gz
hadoop 下载地址
wget http://labs.renren.com/apache-mirror/hadoop/core/hadoop-1.0.4/hadoop-1.0.4.tar.gz
hadoop-1.0.4.tar.gz
#master服务器安装jdk、hadoop
cd /workspace/
tar -xvf hadoop-1.0.4.tar.gz
cp -r hadoop-1.0.4 /home/hadoop/hadoop
tar -xvf jdk-7u17-linux-x64.tar.gz
cp -rp jdk1.7.0_17 /usr/local/
source /etc/profile
#测试jdk安装是否成功
[root@master workspace]# java -version
java version "1.7.0_17"
Java(TM) SE Runtime Environment (build 1.7.0_17-b02)
Java HotSpot(TM) 64-Bit Server VM (build 23.7-b01, mixed mode)
6.修改hadoop配置文件
[hadoop@master conf]$ pwd
/home/hadoop/hadoop/conf
#修改hadoop的jdk环境变量文件
vi hadoop-env.sh //添加JAVA_HOME路径
export JAVA_HOME=/usr/local/jdk1.7.0_17
#修改节点核心配置文件
$ vi core-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://master:9000</value>
</property>
</configuration>
#修改hdf配置
$ vi hdfs-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
</configuration>
~
#修改mapred相关配置
$ vim mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>master:9001</value>
</property>
</configuration>
#修改指定namenode配置文件
vi masters
master
#修改指定datanode配置文件
vi slaves
slave1
slave2
7.配置datanode
所有的datanode(slave1 & slave1)配置都和namenode (master)一样,复制过去即可!
cd /home/hadoop
#将配置文件从master传输到 slave1
scp -r hadoop slave1:/home/hadoop/
rsync -avrpP /etc/profile root@slave1:/etc/
rsync -avrpP /usr/local/jdk1.7.0_17 root@slave1:/usr/local/
#检查变量环境变量是否生效
[hadoop@slave1 ~]$ source /etc/profile
[hadoop@slave1 ~]$ env |grep JAVA
JAVA_HOME=/usr/local/jdk1.7.0_17
CLASSPATH=.:JAVA_HOME/lib/dt.jar:/usr/local/jdk1.7.0_17/lib/tools.jar
#检查jdk运行状态是否正常
[hadoop@slave1 ~]$ java -version
java version "1.7.0_17"
Java(TM) SE Runtime Environment (build 1.7.0_17-b02)
Java HotSpot(TM) 64-Bit Server VM (build 23.7-b01, mixed mode)
#将配置文件从master传输到 slave2
scp -r hadoop slave2:/home/hadoop/
rsync -avrpP /etc/profile root@slave2:/etc/
rsync -avrpP /usr/local/jdk1.7.0_17 root@slave2:/usr/local/
#检查变量环境变量是否生效
source /etc/profile
[hadoop@slave2 ~]$ env |grep JAVA
JAVA_HOME=/usr/local/jdk1.7.0_17
CLASSPATH=.:JAVA_HOME/lib/dt.jar:/usr/local/jdk1.7.0_17/lib/tools.jar
#检查jdk运行状态是否正常
[hadoop@slave2 ~]$ java -version
java version "1.7.0_17"
Java(TM) SE Runtime Environment (build 1.7.0_17-b02)
Java HotSpot(TM) 64-Bit Server VM (build 23.7-b01, mixed mode)
8.格式化HDFS文件系统
pwd
/home/hadoop/hadoop
bin/hadoop namenode –format
#启动服务
bin/start-all.sh
#停止服务
bin/stop-all.sh
9.访问登陆页面
#浏览器访问
http://192.168.1.198:50070/
NameNode 'master:9000'
Started: Wed Mar 20 20:57:14 CST 2013
Version: 1.0.4, r1393290
Compiled: Wed Oct 3 05:13:58 UTC 2012 by hortonfo
Upgrades: There are no upgrades in progress.
Browse the filesystem
Namenode Logs
--------------------------------------------------------------------------------
Cluster Summary
6 files and directories, 1 blocks = 7 total. Heap Size is 33.64 MB / 966.69 MB (3%)
Configured Capacity : 34.53 GB
DFS Used : 68 KB
Non DFS Used : 5.19 GB
DFS Remaining : 29.34 GB
DFS Used% : 0 %
DFS Remaining% : 84.96 %
Live Nodes : 2
Dead Nodes : 0
Decommissioning Nodes : 0
Number of Under-Replicated Blocks : 0
----------------------------------------------------
NameNode Storage:
Storage Directory Type State
/tmp/hadoop-hadoop/dfs/name IMAGE_AND_EDITS Active
#命令行查看
#可用jps查看守护进程是否启动
[hadoop@master tmp]$ jps
4198 JobTracker
4745 Jps
4107 SecondaryNameNode
3955 NameNode
#报告hdfs的基本统计信息
hadoop dfsadmin -report
Configured Capacity: 37073182720 (34.53 GB)
Present Capacity: 31498399744 (29.34 GB)
DFS Remaining: 31498326016 (29.34 GB)
DFS Used: 73728 (72 KB)
DFS Used%: 0%
Under replicated blocks: 0
Blocks with corrupt replicas: 0
Missing blocks: 0
-------------------------------------------------
Datanodes available: 2 (2 total, 0 dead)
Name: 192.168.1.199:50010
Decommission Status : Normal
Configured Capacity: 18536591360 (17.26 GB)
DFS Used: 32768 (32 KB)
Non DFS Used: 2788442112 (2.6 GB)
DFS Remaining: 15748116480(14.67 GB)
DFS Used%: 0%
DFS Remaining%: 84.96%
Last contact: Thu Mar 21 01:33:31 CST 2013
Name: 192.168.1.200:50010
Decommission Status : Normal
Configured Capacity: 18536591360 (17.26 GB)
DFS Used: 40960 (40 KB)
Non DFS Used: 2786340864 (2.59 GB)
DFS Remaining: 15750209536(14.67 GB)
DFS Used%: 0%
DFS Remaining%: 84.97%
Last contact: Thu Mar 21 01:33:33 CST 2013
#文件上传演示
#新建目录
mkdir input
cd input/
echo 'this is test1 file!' >test1
echo 'this is test2 file!' >test2
cd ..
#将目录上传的文件
hadoop dfs -put input in
#创建health文件夹
hadoop fs -mkdir hdfs://192.168.1.198:9000/health
#将本地的文件上传到hadoop的health文件夹
hadoop fs -copyFromLocal hadoop-1.0.4.tar.gz hdfs://192.168.1.198:9000/health/
#查看上传的文件
hadoop fs -ls hadoop-1.0.4.tar.gz hdfs://192.168.1.198:9000/health/
#管理与更新命令:
报告hdfs的基本统计信息
$ bin/hadoop dfsadmin –report
进入安全模式(安全模式下文件系统不允许修改)
$ bin/hadoop dfsadmin -safemode enter
Safe mode is ON
退出安全模式(需要等待17秒生效)
$ bin/hadoop dfsadmin -safemode leave
Safe mode is OFF
启动负载均衡
$ bin/start-balancer.sh