Hadoop CDH4.4集群搭建

结束了一年多的paas开发,转战大数据,备份一些安装文档。


集群示例


hadoop-001     10.168.204.55  NameNode,secondaryNameNode,ResourceManager
hadoop-002     10.168.204.56  DataNode,NodeManager
hadoop-003     10.168.204.57  DataNode,NodeManager
hadoop-004     10.168.204.58  DataNode,NodeManager

hadoop版本:CDH4.4.0
centos版本:6.3

一、准备

   1. jdk 1.7

        http://download.oracle.com/otn-pub/java/jdk/7u45-b18/jdk-7u45-linux-x64.rpm

   sudo rpm -ivh jdk-7u45-linux-x64.rpm
   alternatives --install /usr/bin/java java /usr/java/jdk1.7.0_45/bin/java 300
   alternatives --install /usr/bin/javac javac /usr/java/jdk1.7.0_45/bin/javac 300
   alternatives --config java


   2. 修改hostname

      

vim /etc/sysconfig/network  #修改每个服务器的hostname,重启生效


      配置/etc/hosts

  192.168.204.55 hadoop-001
  192.168.204.56 hadoop-002
  192.168.204.57 hadoop-003
  192.168.204.58 hadoop-004

  3. 防火墙关闭    

service iptables status
service iptables stop 
chkconfig iptables stop

  4. selinux disabled   

#修改为disable
vim /etc/selinux/config

  5. 创建hadoop用户,配置为sudoer

       

adduser hadoop
passwd  hadoop
   
sudo vim /etc/sudoers

  6. ssh without passwd

#切换至hadoop用户
ssh-keygen -t rsa
cat id_rsa.pub >> authorized_keys
chmod 600 authorized_keys


测试 ssh hadoop-001是否可以连本机
将authorized_keys   scp 至其它slaves服务器上。

      

二、安装

  1. 下载CDH4.4 tar

     mkdir cdh4.4.0
     wget http://archive.cloudera.com/cdh4/cdh/4/hadoop-2.0.0-cdh4.4.0.tar.gz
     tar -xvzf hadoop-2.0.0-cdh4.4.0.tar.gz

  2. 设置环境变量 

   修改/etc/profile或 ~/.bashrc,这里改的是bashrc,都一样。

export JAVA_HOME=/usr/java/jdk1.7.0_45
export HADOOP_HOME=/home/hadoop/cdh4.4.0/hadoop-2.0.0-cdh4.4.0
export HADOOP_COMMOM_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HDFS_CONF_DIR=$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_LIB=$HADOOP_HOME/lib
export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native

export PATH=$PATH:/etc/haproxy/sbin/:$JAVA_HOME/bin:$JAVA_HOME/jre/bin
export CLASSPATH=.:$JAVA_HOME/lib/tools.jar:$JAVA_HOME/lib/dt.jar:$HADOOP_LIB/native/libhadoop.so

libhadoop.so其实是后面安装impala时要用到。

  3.  配置文件设置 

core-site.xml



  
   fs.default.name
   hdfs://hadoop-001:8020
  
  
    hadoop.tmp.dir
     /hadoop/tmp
  
  
     fs.trash.interval
     10080
  
  
     fs.trash.checkpoint.interval
     10080
  

  
  
      hadoop.proxyuser.hadoop.hosts
      hadoop-001
  
  
      hadoop.proxyuser.hadoop.groups
      hadoop
  



hdfs-site.xml


  
    dfs.replication
    2
  

  
    dfs.namenode.name.dir
    file:/hadoop/name
    ture
   
   
     dfs.datanode.data.dir
     file:/hadoop/data
     ture
   
   
      dfs.permissions
      false
   
   
      dfs.namenode.http-address
      hadoop-001:50070
   
   
      dfs.secondary.http.address
      hadoop-001:50090
   
   
      dfs.webhdfs.enabled
      true
   
   


yarn-site.xml


  
  
  
    
     yarn.resourcemanager.resource-tracker.address  
     hadoop-001:18025  
    
    
     yarn.resourcemanager.address   
     hadoop-001:18040  
    
    
     yarn.resourcemanager.scheduler.address   
     hadoop-001:18030  
    
    
     yarn.resourcemanager.admin.address   
     hadoop-001:18141  
    
    
      yarn.resourcemanager.webapp.address   
      hadoop-001:8088  
     
     
       yarn.nodemanager.aux-services  
       mapreduce.shuffle  
     
     
       yarn.nodemanager.aux-services.mapreduce.shuffle.class  
       org.apache.hadoop.mapred.ShuffleHandler  
     
     
     yarn.application.classpath  
     $HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/share/hadoop/common/*,$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*,$HADOOP_YARN_HOME/share/hadoop/yarn/*,$HADOOP_YARN_HOME/share/hadoop/yarn/lib/*  
     


mapred-site.xml

  
     
      mapreduce.framework.name  
      yarn  
    
    
      mapreduce.jobhistory.address  
      hadoop-001:10020  
    
    
      mapreduce.jobhistory.webapp.address  
      hadoop-001:19888  
    
    
      mapreduce.job.tracker  
      hadoop-001:8021  
      ture  
    
    
      mapred.system.dir  
      file:/hadoop/mapred/system  
      ture  
    
    
       mapred.local.dir  
       file:/hadoop/mapred/local  
       ture  
    
      
      mapred.child.env    
      LD_LIBRARY_PATH=/usr/local/lib    
     
    

4. 准备hdfs的文件路径

/hadoop/tmp  
/hadoop/mapred/system  
/hadoop/mapred/local  
/hadoop/name  
/hadoop/data  
sudo chown hadoop:hadoop -R /hadoop


5. 将 CDH4.4 scp至slaves节点

     scp -r cdh4.4.0/ hadoop-002:~/.  
     scp -r cdh4.4.0/ hadoop-003:~/.  
     scp -r cdh4.4.0/ hadoop-004:~/.  
     


三、启动

1. 格式化文件系统

#在hadoop-001 master节点上

cd cdh4.4.0/hadoop-2.0.0-cdh4.4.0/bin  
hadoop namenode -format


2. 启动

cd cdh4.4.0/hadoop-2.0.0-cdh4.4.0/sbin  
./start-all.sh

jps一下,看有没有相应的进程。

四、遇到的问题



微博:http://weibo.com/kingjames3

你可能感兴趣的:(云计算,hadoop,大数据)