转载请注明出处:http://blog.csdn.net/guoyJoe/article/details/49472879
一、官方文档:
http://www.cloudera.com/content/www/en-us/documentation/enterprise/latest/topics/cdh_ig_hdfs_cluster_deploy.html
二、下载包:archive.cloudera.com/cdh5 #Index of /cdh5/redhat/6/x86_64/cdh/cloudera-cdh5.repo/复制链接地址
1、下载yum源 #cd /etc/yum.repos.d/
wget http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/cloudera-cdh5.repo
2、查看yum包
yum list |grep cdh
3、安装hdfs
yum -y install hadoop-hdfs-namenode hadoop-hdfs-datanode hadoop-hdfs-journalnode #节点1,主节点
yum -y install hadoop-hdfs-datanode hadoop-hdfs-journalnode hadoop-client hadoop-yarn-nodemanager hadoop-yarn #节点2,从节点
yum -y install hadoop-hdfs-datanode hadoop-hdfs-journalnode hadoop-client hadoop-yarn-nodemanager hadoop-yarn #节点3
三、chkconfig
1.第三节点关闭4个服务
chkconfig hadoop-hdfs-datanode off
chkconfig hadoop-hdfs-journalnode off
chkconfig hadoop-yarn-nodemanager off
chkconfig hadoop-yarn-resourcemanager off
hadoop-hdfs-datanode 0:off 1:off 2:off 3:off 4:off 5:off 6:off
hadoop-hdfs-journalnode 0:off 1:off 2:off 3:off 4:off 5:off 6:off
hadoop-yarn-nodemanager 0:off 1:off 2:off 3:off 4:off 5:off 6:off
hadoop-yarn-resourcemanager 0:off 1:off 2:off 3:off 4:off 5:off 6:off
2.第二个节点关闭3个服务
chkconfig hadoop-hdfs-datanode off
chkconfig hadoop-hdfs-journalnode off
chkconfig hadoop-yarn-nodemanager off
chkconfig hadoop-yarn-resourcemanager off #这个服务在第二节点不用
3.第一个节点关闭4个服务
chkconfig hadoop-hdfs-namenode off
chkconfig hadoop-hdfs-datanode off
chkconfig hadoop-hdfs-journalnode off
chkconfig hadoop-yarn-nodemanager off #如果没有,要安装!
yum -y install hadoop-yarn-nodemanager
四、NTP配置
五、SSH配置
六、jdk安装(三节点)
1.下载jdk-7u25-linux-x64.rpm
2.安装jdk
rpm -qa |grep openjdk
rpm -e java-1.7.0-openjdk-src
rpm -e java-1.7.0-openjdk-devel
rpm -e java-1.7.0-openjdk-demo
rpm -e java-1.7.0-openjdk-javadoc
rpm -e java-1.7.0-openjdk
rpm -ivh jdk-7u25-linux-x64.rpm
rpm -qa |grep java
java -version
hadoop version
hadoop classpath
3.把/etc/profile环境全删除掉(如果配了)
七、在第一个节点操作
1.安装zookeeper,hadoop-mapreduce
yum list |grep hadoop
yum list |grep zookeeper
yum list |grep cdh
yum install -y zookeeper-server hadoop-mapreduce
rpm -qa |grep cdh
rpm -qa |grep cdh | sort
2.在/u01下创建目录并授权
mkdir name data journal local
chown hdfs:hdfs name data journal
3.copy hadoop配置文件
cd /etc/hadoop/conf
mv *.mv /etc/hadoop/conf
core-site.xml
hdfs-site.xml
mapred-site.xml
yarn-site.xml
4.启动hdfs-journalnode
service hadoop-hdfs-journalnode start
5.查看jps
/usr/java/default/bin/jps
6.查看hdfs日志
cd /var/log/hadoop-hdfs/
cat hadoop-hdfs-journalnode-hadoop1.log
八、拷贝配置文件到节点2节点3
scp hdfs-site.xml hadoop2:/etc/hadoop/conf
scp hdfs-site.xml hadoop3:/etc/hadoop/conf
scp core-site.xml hadoop2:/etc/hadoop/conf
scp core-site.xml hadoop3:/etc/hadoop/conf
九、在第二点启动hdfs-journalnode(name主备)
mkdir name data journal local
chown hdfs:hdfs name data journal
service hadoop-hdfs-journalnode start
cat /var/log/hadoop-hdfs/hadoop-hdfs-journalnode-hadoop3.log
十、在第三节点启动hdfs-journalnode(不用name)
mkdir data journal local
chown hdfs:hdfs data journal
service hadoop-hdfs-journalnode start
cat /var/log/hadoop-hdfs/hadoop-hdfs-journalnode-hadoop3.log
十一、在第一节点启动hdfs-namenode
service hadoop-hdfs-namenode start(如果报错查日志)
cat /var/log/hadoop-hdfs/hadoop-hdfs-namenode-hadoop1.log
hdfs命令
hdfs namenode -format
#cd /u01/name
#rm -rf current
sudo -u hdfs namenode -format
service hadoop-hdfs-namenode start
cat /var/log/hadoop-hdfs/hadoop-hdfs-namenode-hadoop1.log
chkconfig
chkconfig zookeeper-server off
十二、在第二节点
hdfs dfs
cd /u01/name
sudo -u hdfs hdfs namenode -bootstrapStandby
cat /var/log/hadoop-hdfs/hadoop-hdfs-journalnode-hadoop3.log
service hadoop-hdfs-namenode start
cat /var/log/hadoop-hdfs/hadoop-hdfs-namenode-hadoop1.log
十三、在第一节点启动hdfs-datanode
service hadoop-hdfs-datanode start
十四、在第二节点启动hdfs-datanode
service hadoop-hdfs-datanode start
十五、在第三节点启动hdfs-datanode
service hadoop-hdfs-datanode start
十六、在第一节点
hdfs
hdfs haadmin -transitionToActive nn1
十七,在第一节点配置zookeeper
chkconfig
cd /etc/zookeeper/conf
vim zoo.cfg(添加)
service.1=hadoop1:2888:3888
service.2=hadoop2:2888:3888
service.3=hadoop3:2888:3888
cd /var/lib/zookeeper
vim myid
1
chown zookeeper:zookeper myid
service zookeeper-server start
service zookeeper-server init
cat /var/log/zookeeper/zookeeper.log
/usr/java/default/bin/jps
十八、节点二
cd /etc/zookeeper/conf
vim zoo.cfg(添加)
service.1=hadoop1:2888:3888
service.2=hadoop2:2888:3888
service.3=hadoop3:2888:3888
cd /var/lib/zookeeper
vim myid
2
chown zookeeper:zookeper myid
service zookeeper-server init –myid=2
service zookeeper-server start
cat /var/log/zookeeper/zookeeper.log
#rpm -qa|grep zookeeper 如没装zookeper会报错
#yum install zookeeper-server
十九、在第三个节点
yum install zookeeper-server
cd /etc/zookeeper/conf
vim zoo.cfg(添加)
service.1=hadoop1:2888:3888
service.2=hadoop2:2888:3888
service.3=hadoop3:2888:3888
cd /var/lib/zookeeper
vim myid
3
chown zookeeper:zookeper myid
service zookeeper-server init –myid=3
service zookeeper-server start
cat /var/log/zookeeper/zookeeper.log
二十、安装zkfc,做主备切换
1.在第一个节点
yum install hadoop-hdfs-zkfc
chkconfig hadoop-hdfs-zkfc off
service hadoop-hdfs-zkfc start
service hadoop-hdfs-zkfc start -formatZK
cat /var/log/hadoop-hdfs/hadoop-hdfs-zkfc-hadoop1.log
hdfs zkfc -formatZK
service hadoop-hdfs-zkfc start
/usr/lib/zookeeper/bin/zkCli.sh
hdfs zkfc -h
hdfs zkfc -formatZK
service hadoop-hdfs-zkfc start
sudo -u hdfs hdfs dfs -ls / #检查hdfs-site.xml中dfs.client.failover.proxy.provider.guoyijin
hadoop fs -ls /
sudo -u hdfs hdfs dfs -ls hdfs://hadoop1:8020/
sudo -u hdfs hdfs dfs -mkdir hdfs://hadoop1:8020/user
sudo -u hdfs hdfs dfs -ls hdfs://hadoop1:8020/
service hadoop-hdfs-namenode restart
/usr/java/default/bin/jps
service hadoop-hdfs-zkfc start
hdfs dfs -ls /
2.在第二个节点
yum install hadoop-hdfs-zkfc
chkconfig hadoop-hdfs-zkfc off
/usr/lib/zookeeper/bin/zkCli.sh
hdfs haadmin
hdfs haadmin -failover nn2 nn1
service hadoop-hdfs-zkfc start
service hadoop-hdfs-namenode stop
/usr/java/default/bin/jps
hdfs dfs -ls /
service hadoop-hdfs-namenode restart
hdfs dfs -ls /
++++++开始:yarn ,mapreduce,spark
+++++yarn
二十一、在第三节点,启动yarn的主控节点,相当于master
service hadoop-yarn-resourcemanager
service hadoop-yarn-resourcemanager start
http://hadoop3:8088/cluster
cd /u01
chown yarn:yarn local
service hadoop-yarn-nodemanager start
二十二、在第一个节点启动yarn节点
service hadoop-yarn-nodemanager start
service hadoopy-yarn-nodemanager status
service hadoop-yarn-nodemanager stop
cd /etc/hadoop/conf
vim yarn-site.xml #注意目录不要写错
service hadoop-yarn-nodemanager start
service hadoop-yarn-nodemanager stop
cd /u01
chown yarn:yarn local
service hadoop-yarn-nodemanager start
二十三、在第二个节点启动yarn节点
cd /etc/hadoop/conf
vim yarn-site.xml #注意目录不要写错
service hadoop-yarn-nodemanager start
service hadoop-yarn-nodemanager stop
cd /u01
chown yarn:yarn local
service hadoop-yarn-nodemanager start
+++++mapreduce
二十四、在第一节点
cat /var/log/hadoop-yarn/yarn-yarn-nodemanager-hadoop1.log
hadoop fs -mkdir /user/history
sudo - hdfs
hadoop fs -mkdir /user/history
hadoop fs -chmod -R 1777 /user/history
hadoop fs -chmown mapred:hadoop /user/history
hadoop fs -mkdir -p /var/log/hadoop-yarn/app
hadoop fs -chown yarn:mapred /var/log/hadoop-yarn/app
hdfs dfs -ls -R /
hadoop fs -mkdir /tmp
hadoop fs -chmod 1777 /tmp
hdfs dfs -ls -R/
exit
pwd
cd /etc/hadoop/conf
cat yarn-site.xml
二十五、第三个节点
yum install hadoop-mapreduce-historyserver
service hadoop-mapreduce-historyserver start
cd /var/log/hadoop-mapreduce
cat maped-mapred-historyserver-hadoop3.out
vim /etc/hadoop/conf/dfs-site.xml
vim /etc/hadoop/conf/mapred-site.xml
cat /etc/hadoop/conf/yarn-site.xml
service hadoop-mapreduce-historyserver start
/usr/lib/java/bin/jps
二十六、第一个节点传到hdfs
su - hdfs
cd /var/log/hadoop-hdfs/
hadoop fs -mkdir /user/hdfs
hadoop fs -mkdir /user/hdfs/input
hadoop fs -put * /user/hdfs/input
hadoop fs -ls -R /user/hdfs
二十七、第一个节点跑一个任务
hadoop jar /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar wordcount /user/hdfs/input /user/hdfs/output
http://hadoop3:8088/cluster
hadoop fs -ls /user/hdfs/output
hadoop fs -cat /user/hdfs/output/part-r-00000
+++++spark
二十八、第一个节点
cd spark/
tar -zxvf spark-1.4.1-bin-hadoop2.6.tgz -C /usr/lib
cd /usr/lib/
ln -s spark-1.4.1-bin-hadoop2.6 spark
cd /usr/lib/spark/conf
ls
上传scp gyj-spark.zip root@hadoop1:/u01/app/backup
unzip gyj-spark.zip
cd spark-defaults.conf
cat spark-env.sh
cp spark-* /usr/lib/spark/conf
cd /usr/lib/spark/conf
vim spark-defaults.conf
/usr/lib/spark/bin/spark-shell –master yarn-client
exit
sudo -u hdfs /usr/lib/spark/bin/spark-shell –master yarn-client
cd /usr/lib/spark/conf
cp log4j.properties.template log4j.propertie
vim log4j.propertie
WARN
su - hdfs
pwd
ll
vim .bash_profile
export SPARK_HOME=/usr/lib/spark
export JAVA_HOME=/usr/java/default
export PATH= PATH: JAVA_HOME/bin:$SPARK_HOME/bin
source .bash_profile
/usr/lib/spark/bin/spark-shell –master yarn-client
scala> val rdd=sc.textFile(“/user/hdfs/input”)
scala> rdd.count