sudo service iptables stop
关闭防火墙自启
sudo chkconfig iptables off
注意:所有的机器防火墙都要关
vim /etc/sysconfig/network
# ----------
HOSTNAME=master
vim /etc/sysconfig/network
# ----------
HOSTNAME=slave1
vim /etc/sysconfig/network
# ----------
HOSTNAME=slave2
vim /etc/hosts
# -------
192.168.100.132 master
192.168.100.133 slave1
192.168.100.134 slave2
注意:所有的机器都要改
ssh-keygen -t rsa # 一路回车
bash
scp ~/.ssh/id_rsa.pub master:/home/hduser/.ssh/slave1
bash
scp ~/.ssh/id_rsa.pub master:/home/hduser/.ssh/slave1
bash
cat id_rsa.pub >> authorized_keys
cat slave1 >> authorized_keys
cat slave2 >> authorized_keys
chmod 600 authorized_keys # 必须改,不然不能免密登录
scp ~/.ssh/authorized_keys slave1:/home/hduser/.ssh/authorized_keys
scp ~/.ssh/authorized_keys slave2:/home/hduser/.ssh/authorized_keys
把Hadoop、JDK的压缩包上传到集群中的每台机器、并解压。
可以先配好一台机器中的配置文件,再将软件复制到其他机器,减少配置的工作量。
vim /etc/profile
# -------
export HADOOP_HOME=/hadoop/hadoop-2.6.5
export JAVA_HOME=/hadoop/jdk1.8.0_172
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin
注意:所有的机器都要改
添加JAVA_HOME
<property>
<name>fs.defaultFSname>
<value>hdfs://master:9000value>
<description>默认的文件系统description>
property>
<property>
<name>hadoop.tmp.dirname>
<value>/hadoop/hadoop-2.6.5/datavalue>
<description>description>
property>
<property>
<name>dfs.replicationname>
<value>2value>
<description>数据块备份数description>
property>
<property>
<name>dfs.permissions.enabledname>
<value>falsevalue>
<description>关闭权限检查description>
property>
<property>
<name>mapreduce.framework.namename>
<value>yarnvalue>
<description>指定运行mapreduce的框架description>
property>
<property>
<name>yarn.resourcemanager.hostnamename>
<value>mastervalue>
<description>指定namenode节点的地址description>
property>
<property>
<name>yarn.nodemanager.aux-servicesname>
<value>mapreduce_shufflevalue>
<description>NodeManager上运行的附属服务description>
property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.classname>
<value>org.apache.hadoop.mapred.ShuffleHandlervalue>
<description>指定shuffle类description>
property>
<property>
<name>yarn.log-aggregation-enablename>
<value>truevalue>
<description>是否启用日志聚合,将slave节点的日志聚合至masterdescription>
property>
slave1
slave2
hadoop namenode -format
# 启动HDFS
start-dfs.sh
# 启动yarn
start-yarn.sh
hadoop fs -put aa.txt /
hadoop fs -get /aa.txt
使用hadoop自带的测试例
* 计算圆周率
hadoop jar hadoop-mapreduce-examples-2.6.5.jar pi 5 5
hadoop jar hadoop-mapreduce-examples-2.6.5.jar wordcount
注意:指定的输出目录在HDFS中必须 不存在,否则会出错
需要同步集群的时间,解决方案如下:
sudo yum -y install ntp ntpdate
ntpdate cn.pool.ntp.org
在mapred-site.xml增加配置
<property>
<name>mapreduce.jobhistory.addressname>
<value>master:10020value>
<description>MapReduce JobHistory Server IPC host:portdescription>
property>
<property>
<name>mapreduce.jobhistory.webapp.addressname>
<value>master:19888value>
<description>MapReduce JobHistory Server Web UI host:portdescription>
property>
启动服务
mr-jobhistory-daemon.sh start historyserver
- http://hadoop.apache.org/docs/r2.6.5/hadoop-project-dist/hadoop-common/SingleCluster.html