0. 机器规划
master
NameNode: cloud092039 8.1,92.39 执行
start-dfs.sh也就是执行hdfs demon的机器
JobTracker/Secondary NameNode: cloud092040 8.1,92.40 执行
start-mapred.sh, 也就是执行map reduce demon的机器
slave
DataNode/TaskTracker1: cloud092041 8.1,92.41
DataNode/TaskTracker2: cloud092042 8.1,92.42
1. 安装Sun JDK,配置JAVA_HOME.CLASS_PATH
2. 新增用户,一定要加密码
groupadd hadoop
useradd -g hadoop hduser
passwd hduser
3. 配置SSH
su - hduser
ssh-keygen -t rsa -P ""
cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys
#修改ssh config
#切换到root
exit
vi /etc/ssh/sshd_config
RSAAuthentication yes
PubkeyAuthentication yes
AuthorizedKeysFile .ssh/authorized_keys
# 从master
(39,40)拷贝公钥到slave(41,42)
ssh-copy-id -i
$HOME
/.ssh/id_rsa.pub hduser@
cloud092041
ssh-copy-id -i
$HOME
/.ssh/id_rsa.pub hduser@
cloud092042
# 在master (39,40)上执行第一次ssh slave(41,42)
,以后都免密码
ssh cloud092041
ssh cloud092042
# 从namenode
39,40
拷贝公钥到secondary namenode40
ssh-copy-id -i
$HOME
/.ssh/id_rsa.pub hduser@
cloud092040
# 在39上执行第一次ssh 40
,以后都免密码
ssh cloud092040
# 从datanode
(41,42)
拷贝公钥到namenode
(39)
和secondary namenode(40)
ssh-copy-id -i
$HOME
/.ssh/id_rsa.pub hduser@
cloud092039
ssh-copy-id -i
$HOME
/.ssh/id_rsa.pub hduser@
cloud092040
# 在41,42上执行第一次ssh 39,40
,以后都免密码
ssh cloud092039
ssh cloud092040
3. 修改hosts文件
vi /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
8.1.92.39 cloud092039
8.1.92.40 cloud092040
8.1.92.41 cloud092041
8.1.92.42 cloud092042
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
4. hadoop安装
# 放在/opt下面
#下面建立tmp 目录
mkdir -p /opt/hadoop-1.2.0/tmp
chown -R hduser.hadoop /opt/hadoop-1.2.0/
5. 修改
$HOME/.bashrc
su - hduser
vi
$HOME/.bashrc
# Set Hadoop-related environment variables
export HADOOP_HOME=/opt/hadoop-1.2.0
# Set JAVA_HOME (we will also configure JAVA_HOME directly for Hadoop later on)
export JAVA_HOME=/usr/lib/jvm/java-6-sun
# Some convenient aliases and functions for running Hadoop-related commands
unalias fs &> /dev/null
alias fs="hadoop fs"
unalias hls &> /dev/null
alias hls="fs -ls"
# If you have LZO compression enabled in your Hadoop cluster and
# compress job outputs with LZOP (not covered in this tutorial):
# Conveniently inspect an LZOP compressed file from the command
# line; run via:
#
# $ lzohead /hdfs/path/to/lzop/compressed/file.lzo
#
# Requires installed 'lzop' command.
#
lzohead () {
hadoop fs -cat $1 | lzop -dc | head -1000 | less
}
# Add Hadoop bin/ directory to PATH
export PATH=$PATH:$HADOOP_HOME/bin
---------------------------
# 重新连接生效
exit
su - hduser
6. 配置secondary master
在39上
vi $HADOOP_HOME/conf/masters
cloud092040
7. 配置slaves
在39,40上
vi $HADOOP_HOME/conf/slaves
cloud092041
cloud092042
8. 配置
core-site.xml
所有机器
vi
$HADOOP_HOME/conf/
core-site.xml
hadoop.tmp.dir
/opt/hadoop-1.2.0/tmp
A base for other temporary directories.
fs.default.name
hdfs://cloud092039:54310
The name of the default file system. A URI whose
scheme and authority determine the FileSystem implementation. The
uri's scheme determines the config property (fs.SCHEME.impl) naming
the FileSystem implementation class. The uri's authority is used to
determine the host, port, etc. for a filesystem.
9. 配置
conf/mapred-site.xml
所有机器
vi
$HADOOP_HOME/conf/
mapred-site.xml
mapred.job.tracker
cloud092040:54311
The host and port that the MapReduce job tracker runs
at. If "local", then jobs are run in-process as a single map
and reduce task.
10. 配置
conf/hdfs-site.xml
所有机器
vi
$HADOOP_HOME/conf/
hdfs-site.xml
dfs.replication
2
Default block replication.
The actual number of replications can be specified when the file is created.
The default is used if replication is not specified in create time.
11.格式化HDFS
在39上
$HADOOP_HOME/bin/hadoop namenode -format
12.安装验证
39 启动
NameNode daemon
$HADOOP_HOME/
bin/start-dfs.sh
自动会联动41 42 启动
DataNode daemon
40
启动
JobTracker daemon.
$HADOOP_HOME/
bin/start-mapred.sh
自动会联动
41 42
启动
TaskTracker daemons
13.wordcount例子运行
在39上,把txt放到了/tmp/
gutenberg
#拷贝到HDFS
$HADOOP_HOME/bin/hadoop dfs -copyFromLocal /tmp/gutenberg /home/hduser/gutenberg
#查看拷贝结果
$HADOOP_HOME/
bin/hadoop dfs -ls /home/hduser
#运行wordcount
cd $HADOOP_HOME
bin/hadoop jar hadoop*examples*.jar wordcount /home/hduser/gutenberg /home/hduser/gutenberg-output
#查看output
bin/hadoop dfs -ls /home/hduser/gutenberg-output
bin/hadoop dfs -cat /home/hduser/gutenberg-output/part-r-00000
mkdir /tmp/gutenberg-output
bin/hadoop dfs -getmerge /home/hduser/gutenberg-output /tmp/gutenberg-output
head /tmp/gutenberg-output/gutenberg-output