概述:分布式核心思想是人多力量大,众人拾材火焰高;把众多计算机集中起来进行任务处理,其存储和运算能力就提高了,可以并行运算,但是众多PC维护管理也是一个问题,所谓众口难调,这就是鱼和熊掌不可兼得的道理,只能两害相侵,取其轻,来进行利益最大化处理。
本次实验使用三台虚拟机:master,node1,node2,其中master作为namenode,senondNameNode,以及JobTracker,另外两个几点作为dataNode和taskTracker,具体搭建过程如下:
1、配置host文件(或者使用DNS服务器)
本次实验使用三台虚拟机:master,node1,node2,其中master作为namenode,senondNameNode,以及JobTracker,另外两个几点作为dataNode和taskTracker,具体搭建过程如下:
1、配置host文件(或者使用DNS服务器)
修改/etc/hosts文件
IP地址 主机名
[root@bogon ~]# vi /etc/hosts
# Do not remove the following line, or various programs
# that require network functionality will fail.
127.0.0.1 localhost.localdomain localhost
::1 localhost6.localdomain6 localhost6
192.168.1.106 node1
192.168.1.107 master
192.168.1.110 node2
[root@bogon ~]# scp /etc/hosts master:/etc/hosts
The authenticity of host 'master (192.168.1.107)' can't be established.
RSA key fingerprint is 42:d9:0b:a6:15:c2:23:c0:2d:d4:bd:88:4b:c5:dd:ff.
Are you sure you want to continue connecting (yes/no)? yes
Warning: Permanently added 'master,192.168.1.107' (RSA) to the list of known hosts.
hosts 100% 252 0.3KB/s 00:00
[root@bogon ~]# scp /etc/hosts node2:/etc/hosts
2、建立hadoop运行账号
配置运行hadoop的专用用户,当然使用超级用户root也不违法乱纪
3、配置ssh免密码接入
每个节点都产生公钥和私钥,拷贝公钥到authorized_keys中
公钥分发传递:然后把各个节点的公钥都拷贝到authorized_keys文件中
生成秘钥到root目录
ssh-keygen -t rsa
公钥文件放入authorized_keys
cd .ssh/
cp id_rsa.pub authorized_keys
4、安装JDK
[root@bogon bin]# vi ~/.bash_profile
JAVA_HOME=/usr/java/jdk1.7.0_67
PATH=$PATH:$HOME/bin:$JAVA_HOME/bin
export PATH JAVA_HOME
验证:
[root@bogon bin]# ssh node1
Last login: Tue Dec 8 11:22:14 2015 from 192.168.1.103
[root@node1 ~]# source .bash_profile
[root@node1 ~]# echo $JAVA_HOME
/usr/java/jdk1.7.0_67
[root@node1 ~]# jps
==========================================================
5、下载并且解压hadoop安装包
1)解压,配置hadoop环境变量bin
HADOOP_HOME
PATH: HADOOP_HOME/bin
6、配置
文件
修改
【hadoop-env.sh】
JAVA_HOME
【core-site.xml】
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop_data</value>
</property>
</configuration>
【hdfs-site.xml】
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.permissions</name>
<value>flase</value>
</property>
</configuration>
【mapred-site.xml】
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>http://master:9001</value>
</property>
</configuration>
7、配置master和slaves文件
masters配置主节点
slaves配置从节点
[root@node2 conf]# cat masters
master
[root@node2 conf]# cat slaves
node1
node2
8、向各个节点复制hadoop
[root@master ~]# scp .bash_profile node1:~/
[root@master ~]# scp .bash_profile node2:~/
[root@node2 opt]#scp -r hadoop node1:/opt
[root@node2 opt]#scp -r hadoop master:/opt
==========================================================
9、格式化namenode
仅仅格式化master节点
hadoop namenode -format
[root@master ~]# hadoop namenode -format
Warning: $HADOOP_HOME is deprecated.
15/12/08 12:41:19 INFO namenode.NameNode: STARTUP_MSG:
/************************************************************
STARTUP_MSG: Starting NameNode
STARTUP_MSG: host = master/192.168.1.107
STARTUP_MSG: args = [-format]
STARTUP_MSG: version = 1.1.2
STARTUP_MSG: build = https://svn.apache.org/repos/asf/hadoop/common/branches/branch-1.1 -r 1440782; compiled by 'hortonfo' on Thu Jan 31 02:03:24 UTC 2013
************************************************************/
15/12/08 12:41:25 INFO util.GSet: VM type = 64-bit
15/12/08 12:41:25 INFO util.GSet: 2% max memory = 19.33375 MB
15/12/08 12:41:25 INFO util.GSet: capacity = 2^21 = 2097152 entries
15/12/08 12:41:25 INFO util.GSet: recommended=2097152, actual=2097152
15/12/08 12:41:29 INFO namenode.FSNamesystem: fsOwner=root
15/12/08 12:41:29 INFO namenode.FSNamesystem: supergroup=supergroup
15/12/08 12:41:29 INFO namenode.FSNamesystem: isPermissionEnabled=true
15/12/08 12:41:29 INFO namenode.FSNamesystem: dfs.block.invalidate.limit=100
15/12/08 12:41:29 INFO namenode.FSNamesystem: isAccessTokenEnabled=false accessKeyUpdateInterval=0 min(s), accessTokenLifetime=0 min(s)
15/12/08 12:41:29 INFO namenode.NameNode: Caching file names occuring more than 10 times
15/12/08 12:41:33 INFO common.Storage: Image file of size 110 saved in 0 seconds.
15/12/08 12:41:33 INFO namenode.FSEditLog: closing edit log: position=4, editlog=/opt/hadoop_data/dfs/name/current/edits
15/12/08 12:41:33 INFO namenode.FSEditLog: close success: truncate to 4, editlog=/opt/hadoop_data/dfs/name/current/edits
15/12/08 12:41:34 INFO common.Storage: Storage directory /opt/hadoop_data/dfs/name has been successfully formatted.
15/12/08 12:41:34 INFO namenode.NameNode: SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at master/192.168.1.107
************************************************************/
10、启动hadoop
在master节点进行启动,master是火车头,统领武林
start-all.sh
[root@master ~]# start-all.sh
Warning: $HADOOP_HOME is deprecated.
starting namenode, logging to /opt/hadoop/libexec/../logs/hadoop-root-namenode-master.out
node2: starting datanode, logging to /opt/hadoop/libexec/../logs/hadoop-root-datanode-node2.out
node1: starting datanode, logging to /opt/hadoop/libexec/../logs/hadoop-root-datanode-node1.out
The authenticity of host 'master (192.168.1.107)' can't be established.
RSA key fingerprint is 42:d9:0b:a6:15:c2:23:c0:2d:d4:bd:88:4b:c5:dd:ff.
Are you sure you want to continue connecting (yes/no)? yes
master: Warning: Permanently added 'master,192.168.1.107' (RSA) to the list of known hosts.
master: starting secondarynamenode, logging to /opt/hadoop/libexec/../logs/hadoop-root-secondarynamenode-master.out
starting jobtracker, logging to /opt/hadoop/libexec/../logs/hadoop-root-jobtracker-master.out
node2: starting tasktracker, logging to /opt/hadoop/libexec/../logs/hadoop-root-tasktracker-node2.out
node1: starting tasktracker, logging to /opt/hadoop/libexec/../logs/hadoop-root-tasktracker-node1.out
11、验证进程
使用jps验证各个后台进程是否启动成功
[root@master ~]# jps
3614 NameNode
3763 SecondaryNameNode
3916 Jps
3837 JobTracker
[root@node1 ~]# jps
3513 Jps
[root@node1 ~]# jps
3626 TaskTracker
3555 DataNode
3667 Jps
[root@node2 ~]# jps
3573 DataNode
3627 TaskTracker
3698 Jps
[root@node2 ~]#
[root@master bin]# hadoop dfsadmin -report
Warning: $HADOOP_HOME is deprecated.
Configured Capacity: 36889264128 (34.36 GB)
Present Capacity: 28400594944 (26.45 GB)
DFS Remaining: 28400537600 (26.45 GB)
DFS Used: 57344 (56 KB)
DFS Used%: 0%
Under replicated blocks: 0
Blocks with corrupt replicas: 0
Missing blocks: 0
-------------------------------------------------
Datanodes available: 2 (2 total, 0 dead)
Name: 192.168.1.106:50010
Decommission Status : Normal
Configured Capacity: 18444632064 (17.18 GB)
DFS Used: 28672 (28 KB)
Non DFS Used: 4213334016 (3.92 GB)
DFS Remaining: 14231269376(13.25 GB)
DFS Used%: 0%
DFS Remaining%: 77.16%
Last contact: Tue Dec 08 12:58:40 PST 2015
Name: 192.168.1.110:50010
Decommission Status : Normal
Configured Capacity: 18444632064 (17.18 GB)
DFS Used: 28672 (28 KB)
Non DFS Used: 4275335168 (3.98 GB)
DFS Remaining: 14169268224(13.2 GB)
DFS Used%: 0%
DFS Remaining%: 76.82%
Last contact: Tue Dec 08 12:58:39 PST 2015
启动停止
[root@master bin]# stop-all.sh
Warning: $HADOOP_HOME is deprecated.
no jobtracker to stop
node1: no tasktracker to stop
node2: no tasktracker to stop
stopping namenode
node2: stopping datanode
node1: stopping datanode
master: stopping secondarynamenode