Hadoop集群部署笔记

参考网址: http://www.cnblogs.com/xia520pi/archive/2012/05/16/2503949.html
1、修改hostname
    [root@aaa hadoop]# hostname hadoop.datanode1.com
    [root@aaa hadoop]# vi /etc/sysconfig/network
    修改:
    HOSTNAME=hadoop.datanode1.com
    [root@aaa hadoop]# hostname
2、修改hosts
    [root@hadoop hadoop]# vi /etc/hosts
    添加:
    192.168.1.204 hadoop.datanode1.com
     192.168.1.205 hadoop.datanode2.com
    192.168.1.206 hadoop.datanode3.com
3、SSH无密钥配置
    前提:ssh已安装
    配置master节点:
    使用hadoop用户:
    [hadoop@hadoop ~]$ ssh-keygen -t rsa -P ''
    [hadoop@hadoop ~]$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
    [hadoop@hadoop ~]$ chmod 600 ~/.ssh/authorized_keys
    [hadoop@hadoop ~]$ chmod 700 ~/.ssh
    使用root用户:
    [root@hadoop hadoop]# vi /etc/ssh/sshd_config
    勾掉以下行的注释,然后保存:
    RSAAuthentication yes
    PubkeyAuthentication yes
    AuthorizedKeysFile .ssh/authorized_keys
    [root@hadoop hadoop]# service sshd restart
    用hadoop用户测试是否成功:
    [root@hadoop hadoop]# su hadoop
    [hadoop@hadoop ~]$ ssh localhost
   
    复制公钥到Slave节点
    [hadoop@hadoop ~]$ scp ~/.ssh/id_rsa.pub [email protected]:~/
    配置slave节点:
    [hadoop@hadoop ~]$ mkdir ~/.ssh
    [hadoop@hadoop ~]$ chmod 700 ~/.ssh
    [hadoop@hadoop ~]$ mkdir ~/.ssh
    [hadoop@hadoop ~]$ cat ~/id_rsa.pub >> ~/.ssh/authorized_keys
    [hadoop@hadoop ~]$ chmod 600 ~/.ssh/authorized_keys
    [hadoop@hadoop ~]$ su root
    [root@hadoop hadoop]# vi /etc/ssh/sshd_config
        勾掉以下行的注释,然后保存:
RSAAuthentication yes
PubkeyAuthentication yes
AuthorizedKeysFile .ssh/authorized_keys
    [root@hadoop hadoop]# service sshd restart
    [root@hadoop hadoop]# su hadoop
    [hadoop@hadoop ~]$ rm -rf ~/id_rsa.pub
   
    在master节点,测试是否可以登录slave
    [hadoop@hadoop ~]$ ssh 192.168.1.205
   
4、安装JDK
    [root@hadoop hadoop]# mkdir /usr/java
    [root@hadoop hadoop]# cp /home/hadoop/soft/jdk-6u45-linux-x64.bin /usr/java
    [root@hadoop hadoop]# cd /usr/java
    [root@hadoop java]# chmod +x jdk-6u45-linux-x64.bin
    [root@hadoop java]# ./jdk-6u45-linux-x64.bin
    [root@hadoop java]# rm -rf jdk-6u45-linux-x64.bin
    添加jdk的环境变量
    [root@hadoop java]# vi /etc/profile
    文件的尾部添加:
export JAVA_HOME=/usr/java/jdk1.6.0_45
export JRE_HOME=/usr/java/jdk1.6.0_45/jre
export CLASSPATH=.:$CLASSPATH:$JAVA_HOME/lib:$JRE_HOME/lib
export PATH=$PATH:$JAVA_HOME/bin
    [root@hadoop java]# source /etc/profile
    [root@hadoop java]# java -version
   
    jdk版本不对时,需要设置默认jdk
    [root@hadoop java]# sudo update-alternatives --install /usr/bin/java java /usr/java/jdk1.6.0_45/bin/java 300
    [root@hadoop java]# sudo update-alternatives --install /usr/bin/java javac /usr/java/jdk1.6.0_45/bin/javac 300
    [root@hadoop java]# sudo update-alternatives --config java
5、安装hadoop
    [root@hadoop hadoop]#cp /home/hadoop/soft/hadoop-1.2.1.tar.gz /usr
    [root@hadoop hadoop]#cd /usr
    [root@hadoop hadoop]#tar -zxvf hadoop-1.2.1.tar.gz
    [root@hadoop hadoop]# mv hadoop-1.2.1 hadoop
    [root@hadoop hadoop]# chown -R hadoop:hadoop hadoop
    [root@hadoop hadoop]# rm -rf hadoop-1.2.1.tar.gz
    [root@hadoop hadoop]# mkdir /usr/hadoop/tmp
    [root@hadoop hadoop]# vi /etc/profile
    文件末尾添加环境变量:
export HADOOP_HOME=/usr/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
    [root@hadoop hadoop]# source /etc/profile
 
6、配置Hadoop
    1)配置hadoop-env.sh
    [root@hadoop hadoop]# cd conf
    [root@hadoop conf]# pwd
    /usr/hadoop/conf
    [root@hadoop conf]# vi hadoop-env.sh
    文件末尾添加以下内容:
    # set java environment
    export JAVA_HOME=usr/java/jdk1.6.0_45
    2)配置core-site.xml文件(先在 /usr/hadoop 目录下建立 tmp 文件夹)
    [root@hadoop conf]# vi core-site.xml
        <property>
            <name>hadoop.tmp.dir</name>
            <value>/usr/hadoop/tmp/hadoop-${user.name}</value>
            <description>A base for other temporary directories.</description>
        </property>
        <!-- file system properties -->
        <property>
            <name>fs.default.name</name>
           <value>hdfs://hadoop.datanode1.com:9000</value>
        </property>
        <property>
              <name>dfs.datanode.socket.write.timeout</name>
              <value>3000000</value>
       </property>
                       
       <property>
            <name>dfs.socket.timeout</name>
            <value>3000000</value>
       </property>
       <property>
          <name>mapred.hosts.exclude</name>
          <value>/usr/hadoop/conf/slaves.exclude</value>
       </property>
       <property>
         <name>dfs.hosts.exclude</name>
         <value>/usr/hadoop/conf/slaves.exclude</value>
       </property>
       3)配置mapred-site.xml文件
       [root@hadoop conf]# vi mapred-site.xml
        <property>
            <name>mapred.job.tracker</name>
        <value>hadoop.datanode1.com:9001</value>
        </property>
        <property>
            <name>mapred.local.dir</name>
            <value>/data/hadoop/mapred/local/hadoop-${user.name}</value>
        </property>
        <property>
            <name>mapred.system.dir</name>
            <value>/data/hadoop/mapred/system/hadoop-${user.name}</value>
        </property>
        <property>
            <name>mapred.reduce.parallel.copies</name>
            <value>15</value>
        </property>
        <property>
            <name>mapred.tasktracker.map.tasks.maximum</name>
            <value>6</value>
        </property>
        <property>
            <name>mapred.tasktracker.reduce.tasks.maximum</name>
            <value>2</value>
        </property>
    4)配置hdfs-site.xml文件
    [root@hadoop conf]# vi hdfs-site.xml
      <property>
            <name>dfs.name.dir</name>
            <value>/data/hadoop/hdfs/name/hadoop-${user.name}</value>
        </property>
        <property>
            <name>dfs.data.dir</name>
            <value>/data/hadoop/hdfs/data/hadoop-${user.name}</value>
       </property>
       <property>
            <name>dfs.replication</name>
            <value>2</value>
        </property>
        <property>
            <name>dfs.support.append</name>
            <value>true</value>
            <description>support the function append.</description>
        </property>
        <property>
            <name>dfs.permissions</name>
            <value>false</value>
        </property>
        <property>
            <name>dfs.block.size</name>
            <value>134217728</value>
        </property>
        <property>
            <name>dfs.balance.bandwidthPerSec</name>
            <value>10485760</value>
            <description>Specifies the maximum bandwidth that each datanode can utilize for the balancing purpose in term of the number of bytes per second.</description>
        </property>
    5)配置masters文件
    [root@hadoop conf]#vi masters
    修改localhost为hadoop.datanode1.com
    5)配置slaves文件
    [root@hadoop conf]#vi slaves
    内容修改为:
    hadoop.datanode1.com
    hadoop.datanode2.com
    hadoop.datanode3.com
    配置slaves节点的Hadoop,配置方法和master一样....
7、启动Hadoop集群
    1)格式化(只执行一次,以后启动时,不用执行)
    [hadoop@hadoop ~]$ hadoop namenode –format
   
    2)启动前必须关闭防火墙(注意,root用户)
    [root@hadoop conf]# service iptables stop
 
    3)启动hadoop
    [hadoop@hadoop hadoop]$ bin/start-all.sh
    4)检查是否正常启动
    [hadoop@hadoop hadoop]$ jps
 

你可能感兴趣的:(hadoop,部署)