contos cdh5

1: 设备角色

vim /etc/hosts 
192.168.169.132 hadoop_nn
192.168.169.133 hadoop_dn1
192.168.169.134 hadoop_dn2
192.168.169.135 hadoop_dn3
vim /etc/sysconfig/network 写道
NETWORKING=yes
HOSTNAME=hadoop_nn # 都要修改主机名
使它升效:service network restart 

关闭iptables和SELinux

 

关闭iptables
service iptables stop (临时关闭)
chkconfig iptables off(重启后生效)
关闭SELINUX:
setenforce 0 (临时生效)(这个方法我没有运行成功)  
修改 /etc/selinux/config 下的 SELINUX=disabled (重启后永久生效 )该方法测试有效。
查看selinux状态:/usr/sbin/sestatus -v

 

2:打通ssh

所有机器 ssh-keygen -t rsa 一路按回车
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
仅在NameNode上scp文件到其他机器: 写道
scp ~/.ssh/authorized_keys root@hadoop_dn1:~/.ssh/
scp ~/.ssh/authorized_keys root@hadoop_dn2:~/.ssh/
scp ~/.ssh/authorized_keys root@hadoop_dn3:~/.ssh/

测试是否升效

[root@hadoop_nn ~]# ssh hadoop_dn2
Last login: Sun Oct 12 03:40:47 2014 from 192.168.169.1
[root@hadoop_dn2 ~]#

 

 3:所有设备安装JDK7

wget http://download.oracle.com/otn-pub/java/jdk/7u67-b01/jdk-7u67-linux-x64.rpm?AuthParam=1411394035_e8dc92cf1ce115633bfff2429fd6da78 -O jdk-7u67-linux-x64.rpm
rpm -ivh jdk-7u67-linux-x64.rpm
配置环境变量: 写道
export JAVA_HOME=/usr/java/latest
export PATH=$JAVA_HOME/bin:$PATH
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar

 执行:source /etc/profile

4:创建用户和组

groupadd hdfs && useradd hdfs -g hdfs && useradd hadoop -g hdfs

5:安装cdh(官网:http://www.cloudera.com/content/cloudera/en/home.html)

wget "http://archive.cloudera.com/cdh5/one-click-install/redhat/6/x86_64/cloudera-cdh-5-0.x86_64.rpm"
yum --nogpgcheck localinstall cloudera-cdh-5-0.x86_64.rpm
添加cloudera仓库验证
rpm --import http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/RPM-GPG-KEY-cloudera
如果你的linux是5.x,哪就换成5
NN安装(192.168.169.132 hadoop_nn)
安装NN
yum clean all; yum install hadoop-hdfs-namenode
安装hadoop-client
yum clean all; yum install hadoop-client 
DN安装(其它三台)
安装NM,DN,MR
yum clean all; yum install hadoop-yarn-nodemanager hadoop-hdfs-datanode hadoop-mapreduce
安装hadoop-client
yum clean all; yum install hadoop-client 
RM (在NN上)
安装RM
yum clean all; yum install hadoop-yarn-resourcemanager
所有设备安装LZO 
vim /etc/yum.repos.d/cloudera-gplextras5.repo
内容:
[cloudera-gplextras5]
# Packages for Cloudera's GPLExtras, Version 5, on RedHat or CentOS 6 x86_64
name=Cloudera's GPLExtras, Version 5
baseurl=http://archive.cloudera.com/gplextras5/redhat/6/x86_64/gplextras/5/
gpgkey = http://archive.cloudera.com/gplextras5/redhat/6/x86_64/gplextras/RPM-GPG-KEY-cloudera
gpgcheck = 1
然后yum安装
yum install hadoop-lzo

创建目录

DN(多磁盘可以创建多个) 写道
mkdir -p /data/cache1/dfs/dn && mkdir -p /data/cache1/dfs/mapred/local && chown -R hdfs:hdfs /data/cache1/dfs/dn && chown -R hdfs:hdfs /data/cache1/dfs/mapred/local
NN 
mkdir -p /data/cache1/dfs/nn /data/cache2/dfs/nn
chown -R hdfs:hdfs /data/cache1/dfs/nn /data/cache2/dfs/nn
chmod 700 /data/cache1/dfs/nn /data/cache2/dfs/nn

  

6:配置文件(/etc/hadoop/conf)

hadoop-env.sh (安装时没自带,cp即可)
# Set Hadoop-specific environment variables here.
# The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.
# The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""
# Extra Java runtime options. Empty by default.
export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true ${HADOOP_OPTS}"
# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dsecurity.audit.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT ${HADOOP_NAMENODE_OPTS}"
HADOOP_JOBTRACKER_OPTS="-Dsecurity.audit.logger=INFO,DRFAS -Dmapred.audit.logger=INFO,MRAUDIT -Dmapred.jobsummary.logger=INFO,JSA ${HADOOP_JOBTRACKER_OPTS}"
HADOOP_TASKTRACKER_OPTS="-Dsecurity.audit.logger=ERROR,console -Dmapred.audit.logger=ERROR,console ${HADOOP_TASKTRACKER_OPTS}"
HADOOP_DATANODE_OPTS="-Dsecurity.audit.logger=ERROR,DRFAS ${HADOOP_DATANODE_OPTS}"
export HADOOP_SECONDARYNAMENODE_OPTS="-Dsecurity.audit.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT ${HADOOP_SECONDARYNAMENODE_OPTS}"
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx128m ${HADOOP_CLIENT_OPTS}"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData ${HADOOP_JAVA_PLATFORM_OPTS}"
# On secure datanodes, user to run the datanode as after dropping privileges
export HADOOP_SECURE_DN_USER=hdfs
# Where log files are stored. $HADOOP_HOME/logs by default.
export HADOOP_LOG_DIR=/var/local/hadoop/logs
# Where log files are stored in the secure data environment.
export HADOOP_SECURE_DN_LOG_DIR=$HADOOP_LOG_DIR
# The directory where pid files are stored. /tmp by default.
export HADOOP_PID_DIR=/var/local/hadoop/pid
export HADOOP_SECURE_DN_PID_DIR=$HADOOP_PID_DIR
# A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER
export JAVA_HOME=/usr/java/latest

 

mapred-site.xml 写道
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>    #使用yarn
</property>
<property>
<name>mapred.local.dir</name>
<value>/data/cache1/dfs/mapred/local</value>    #mapred数据存放目录
</property>
</configuration>

 

core-site.xml 写道
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://hadoop_nn:8020</value>
</property>

<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>

 

hdfs-site.xml 写道
<configuration>
<property>
<name>dfs.name.dir</name>
<value>/data/cache1/dfs/nn</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/data/cache1/dfs/dn/</value>
</property>
</configuration>

 

yarn-site.xml 增加 (rm的ip)
<property>
<name>yarn.resourcemanager.address</name>
<value>192.168.169.132:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>192.168.169.132:8030</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>0.0.0.0:8088</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>192.168.169.132:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>192.168.169.132:8033</value>
</property>

 

masters 写道
hadoop_nn #本例中的

 

slaves 写道
hadoop_dn1
hadoop_dn2
hadoop_dn3

 分别scp到其它主机

scp -r /etc/hadoop/conf root@hadoop_dn3:/etc/hadoop/conf

 

7:启动

NN

/etc/init.d/hadoop-hdfs-namenode init
/etc/init.d/hadoop-hdfs-namenode start

 RN

/etc/init.d/hadoop-yarn-resourcemanager start

 DN

/etc/init.d/hadoop-yarn-nodemanager start
/etc/init.d/hadoop-hdfs-datanode start

 8:查看

http://192.168.169.132:50070/
http://192.168.169.132:8088/cluster (类似于hadoop1.0的Jobtracker地址,即50030端口)

 9:问题

启动NN时报:log4j:ERROR Could not find value for key log4j.appender.DRFAAUDIT错误;
解决办法:在/etc/hadoop/conf/log4j.properties 加入以下配置
log4j.appender.DRFAAUDIT=org.apache.log4j.ConsoleAppender
log4j.appender.DRFAAUDIT.layout=org.apache.log4j.PatternLayout

 创建文件没有权限,赋权限即可

 

你可能感兴趣的:(cdh)