HDB IN ACTION(2)

五、命令行安装HDB2.2
1.系统参数

vi /etc/sysctl.conf
         #modify
     kernel.sysrq = 1
         net.ipv4.tcp_syncookies = 0


         #added for hawq
     kernel.shmmni = 4096
     kernel.sem = 250 512000 100 2048
     kernel.msgmni = 2048
     net.ipv4.tcp_tw_recycle = 1
     net.ipv4.tcp_max_syn_backlog = 200000
     net.ipv4.conf.all.arp_filter = 1
     net.ipv4.ip_local_port_range = 1281 65535
     net.core.netdev_max_backlog = 200000
     vm.overcommit_memory = 2
         fs.nr_open = 3000000
     kernel.threads-max = 798720
     kernel.pid_max = 798720
     net.core.rmem_max=2097152
     net.core.wmem_max=2097152

     sysctl -p  #让配置生效

2.vi /etc/security/limits.conf

增加以下内容
    * soft nofile 2900000
    * hard nofile 2900000
    * soft nproc 131072
    * hard nproc 131072

3.hdfs-site.xml

dfs.allow.truncate  true
      dfs.block.access.token.enable 对于不安全的HDFS集群为false ,或者对于安全集群为true
      dfs.block.local-path-access.user  gpadmin
      dfs.client.read.shortcircuit  true
      dfs.client.socket-timeout 300000000
      dfs.client.use.legacy.blockreader.local   false
      dfs.datanode.data.dir.perm    750
      dfs.datanode.handler.count    60
      dfs.datanode.max.transfer.threads 40960
      dfs.datanode.socket.write.timeout 7200000
      dfs.namenode.accesstime.precision 0
      dfs.namenode.handler.count    600
      dfs.support.append    true

      
       
          dfs.allow.truncate
          true
       
       
          dfs.block.access.token.enable
          false
       
       
          dfs.block.local-path-access.user
          gpadmin
       
       
          dfs.client.read.shortcircuit#安装后namenode无法启动,去掉正常
          true
       
       
          dfs.client.socket-timeout   #安装后namenode无法启动,去掉正常
          300000000
       
       
          dfs.client.use.legacy.blockreader.local
          false
       
       
          dfs.datanode.data.dir.perm
          750
       
       
          dfs.datanode.handler.count
          60
       
       
          dfs.datanode.max.transfer.threads
          40960
       
       
          dfs.datanode.socket.write.timeout
          7200000
       
       
          dfs.namenode.accesstime.precision
          0
       
       
          dfs.namenode.handler.count
          600
       

4.core-site.xml

ipc.client.connection.maxidletime   3600000
      ipc.client.connect.timeout    300000
      ipc.server.listen.queue.size  3300

      
      <property>
        <name>ipc.client.connection.maxidletimename>
        <value>3600000value>
      property>
      <property>
        <name>ipc.client.connect.timeoutname>
        <value>300000value>
      property>
      <property>
        <name>ipc.server.listen.queue.sizename>
        <value>3300value>
      property>


      #重启集群以使配置生效
      #注意:重启报错Could not reserve enough space for object heap,可以/etc/profile
      #export _JAVA_OPTIONS="-Xms512m -Xmx1024m"
      #强制限制java内存大小,测试环境未采用此方法,通过调整配置文件堆大小 

5.下载文件

https://network.pivotal.io/官网下载HDB2.2.0.0-414.el6.tar.gz 

     cd /home/hawqpackage
     chmod a+rx hawqpackage/
     tar xzvf hdb-2.2.0.0-4141.el6.tar.gz 
     cd hdb-2.2.0

6.安装辅助包
(1)httpd

    yum install httpd
    service httpd start
 **(2)hdb源:在c9test91上执行**
    cd hdb-2.2.0
    ./setup_repo.sh   #将HDB软件分发添加到本地的yum软件包存储库,setup_repo.sh在本地主机上创建一个名为hdb-.repo的HDB存储hdb-.repo 。 
 该脚本还会从httpd服务器( /var/www/html )的文档根目录创建一个符号链接到HDB .tar.gz文件解压缩的目录。

  创建了/etc/yum.repos.d/hdb-2.2.0.0.repo
  可以通过http://c9test91/hdb-2.2.0.0来直接访问

  scp /etc/yum.repos.d/hdb-2.2.0.0.repo c9test92:/etc/yum.repos.d/
  scp /etc/yum.repos.d/hdb-2.2.0.0.repo c9test93:/etc/yum.repos.d/
  scp /etc/yum.repos.d/hdb-2.2.0.0.repo c9test94:/etc/yum.repos.d/

 **(3)epel-release源:在所有机器上执行**
   yum install epel-release
   修改/etc/yum.repos.d/epel.repo
  打开所有baseurl 关闭所有mirror
  [epel]
   name=Extra Packages for Enterprise Linux 6 - $basearch
   baseurl=http://download.fedoraproject.org/pub/epel/6/$basearch
   #mirrorlist=https://mirrors.fedoraproject.org/metalink?repo=epel-6&arch=$basearch
   。。。其余类似

  导入epel源后报错的解决:
  yum repolist #确定本地yum源正常
  /etc/yum.repos.d/epel.repo打开baseurl 关闭mirror


 **(4)错误处理:**
    报缺少包错误:Package: hawq-ranger-plugin_2_2_0_0-2.2.0.0-4141.el6.noarch (hdb-2.2.0.0) Requires: bigtop-tomcat
     wget -O /etc/yum.repos.d/bigtop.repo http://archive.apache.org/dist/bigtop/1.2.0/repos/centos6/bigtop.repo

    scp /etc/yum.repos.d/bigtop.repo root@c9test92:/etc/yum.repos.d/
    scp /etc/yum.repos.d/bigtop.repo root@c9test93:/etc/yum.repos.d/
scp /etc/yum.repos.d/bigtop.repo root@c9test94:/etc/yum.repos.d/

7.安装hdb2.2

su - root
yum install -y hawq
source /usr/local/hawq/greenplum_path.sh
#hawq软件默认安装在/usr/local/hawq下,如果想在root下操作编辑/etc/profile
source /usr/local/hawq/greenplum_path.sh

#source /etc/profile使其生效即可。

#注意如果yum出错,可以用yum clean all清除之前的安装
#(1)编辑所有主机文件hostfile和segment主机文件seg_hosts**
         vi hostfile
        c9test91
        c9test92
        c9test93
        c9test94

        vi seg_hosts
        c9test93
        c9test94

#(2)其他主机安装hawq软件**
         hawq ssh -f hostfile -e "yum install -y hawq" 


#(3)gpadmin用户文件
        su - gpadmin
        vi .bash_profile
        source /usr/local/hawq/greenplum_path.sh

    export HADOOP_PREFIX="/home/hadoop/hadoop"
    export HADOOP_MAPRED_HOME=$HADOOP_PREFIX
    export HADOOP_COMMON_HOME=$HADOOP_PREFIX
    export HADOOP_HOME=$HADOOP_PREFIX
    export HADOOP_HDFS_HOME=$HADOOP_PREFIX
    export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
    export YARN_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
    export YARN_HOME=$HADOOP_PREFIX
    #export HBASE_HOME="/opt/software/hbase"
    #export SPARK_HOME="/opt/software/spark"
    #export SPARK_JAR="hdfs:///spark/spark.jar"
    export PATH=$PATH:$HADOOP_PREFIX/bin:$HADOOP_PREFIX/sbin

        将文件scp到其他机器
        hawq scp -f hostfile .bash_profile =:/home/gpadmin/

        让其他机器生效:
        hawq ssh -f hostfile -e "source /home/gpadmin/.bash_profile"  

#(4)确认主机已经安装hawq
       hawq ssh -f hostfile -e "ls -l $GPHOME "

#(5)创建数据存储主目录,属于gpadmin
       mkdir -p /home/hawqdata/master
       chown -R gpadmin:gpadmin /home/hawqdata/ 

       standby host:
       hawq ssh -h c9test92 -e 'mkdir -p /home/hawqdata/master'
       hawq ssh -h c9test92 -e 'chown -R gpadmin:gpadmin /home/hawqdata'

#(6)创建数据存储segment目录
       hawq ssh -f seg_hosts -e 'mkdir -p /home/hawqdata/segment'
       hawq ssh -f seg_hosts -e 'chown -R gpadmin:gpadmin /home/hawqdata'

#(7)创建临时目录用于溢出文件,使用每个驱动器目录平衡写入
        $ dirs="/home/hawqdata/tmp1 /home/hawqdata/tmp2"
    $ mkdir -p $dirs
    $ chown -R gpadmin:gpadmin $dirs
    $ hawq ssh -h c9test92 -e "mkdir -p $dirs"
    $ hawq ssh -h c9test92 -e "chown -R gpadmin:gpadmin $dirs"
    $ hawq ssh -f seg_hosts -e "mkdir -p $dirs"
    $ hawq ssh -f seg_hosts -e "chown -R gpadmin:gpadmin $dirs"

#(8)hawq-site.xml
        cp $GPHOME/etc/template-hawq-site.xml hawq-site.xml

#<1>更改hawq_dfs_url属性定义以使用实际的Namenode端口号以及HAWQ数据目录:
       
        hawq_dfs_url
        bvdata/hawq_default
        URL for accessing HDFS.
           
       如果hdfs配置了HA,使用服务ID替代hawq_dfs_url值

       gpadmin设置为您指定的父目录HDFS目录的所有者:
       hdfs dfs -chown gpadmin /

#<2>端口和目录
          Property  Example Value
    hawq_master_address_host    c9test91
    hawq_master_address_port    5432
    hawq_standby_address_host   c9test92
    hawq_segment_address_port   40000
    hawq_master_directory   /home/hawqdata/master
    hawq_segment_directory  /home/hawqdata/segment
    hawq_master_temp_directory  /home/hawqdata/tmp1,/home/hawqdata/tmp2
    hawq_segment_temp_directory /home/hawqdata/tmp1,/home/hawqdata/tmp2
    hawq_global_rm_type none


#<3>配置yarn属性
    属性  示例值 评论
    hawq_global_rm_type yarn    当设置为yarn时,HAWQ要求您配置其他YARN配置参数( hawq_rm_yarn_address或hawq_rm_yarn_scheduler_address
    hawq_rm_yarn_address    c9test93:8032   此属性必须匹配yarn-site.xml yarn.resourcemanager.address中的yarn.resourcemanager.address值。
    hawq_rm_yarn_scheduler_address  c9test93:8030   此属性必须匹配yarn-site.xml的yarn.resourcemanager.scheduler.address值。

    如果yarn配置了HA,需要$GPHOME/etc/yarn-client.xml
    属性  示例值 评论
    yarn.resourcemanager.ha rm1.example.com:8032,rm2.example.com:8032   以逗号分隔的资源管理器主机列表。 启用高可用性时,YARN会忽略hawq_rm_yarn_address中的值,并改用该属性的值。
    yarn.resourcemanager.scheduler.ha   rm1.example.com:8030,rm2.example.com:8030   逗号分隔的调度程序主机列表。 当启用高可用性时,YARN忽略hawq_rm_yarn_scheduler_address中的值,并改用该属性的值。

#(9)slaves
        vi $GPHOME/etc/slaves
    c9test93
    c9test94

#(10)Namenode的HA,编辑hdfs-client.xml
        ${GPHOME}/etc/hdfs-client.xml

    
    dfs.ha.namenodes.bvdata
    c9test91,c9test92
    

    
    dfs.namenode.http-address.bvdata.c9test91
    c9test91:50070
    

    
     dfs.namenode.http-address.bvdata.c9test92
    c9test92:50070
    

    
    dfs.namenode.rpc-address.bvdata.c9test91
     c9test91:8020
    

    
    dfs.namenode.rpc-address.bvdata.c9test92
    c9test92:8020
    

    
    dfs.nameservices
    bvdata
    

#(11)将hawq-site.xml和hdfs-client.xml分发到其他机器
       hawq scp -f hostfile hawq-site.xml slaves hdfs-client.xml =:$GPHOME/etc/
       ..

#(12)初始化集群
#<1>所有机器把软件目前目前设置成为gpadmin
   hawq ssh -f hostfile -e "chown -R gpadmin:gpadmin /usr/local/hawq/" 

#<2>将gpadmin加入/etc/sudoers组

#<3>所有机器的用户环境变量:
        vi .bash_profile
    export HADOOP_PREFIX="/home/hadoop/hadoop"
    export HADOOP_MAPRED_HOME=$HADOOP_PREFIX
    export HADOOP_COMMON_HOME=$HADOOP_PREFIX
    export HADOOP_HOME=$HADOOP_PREFIX
    export HADOOP_HDFS_HOME=$HADOOP_PREFIX
    export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
    export YARN_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
    export YARN_HOME=$HADOOP_PREFIX
    #export HBASE_HOME="/opt/software/hbase"
    #export SPARK_HOME="/opt/software/spark"
    #export SPARK_JAR="hdfs:///spark/spark.jar"
    export PATH=$PATH:$HOME/bin:$HADOOP_PREFIX/bin:$HADOOP_PREFIX/sbin

#<4>将hadoop和zookeeper权限设置为777
    chmod -R 777 /home/hadoop/hadoop  千万别把hadoop用户的根目录设置成777否则hadoop无法启动

#<5>将gpadmin加入hadoop组
    sudo usermod -a -G hadoop gpadmin


#<6>如果报/tmp权限的问题,需要注意将/tmp目录设置为777
#<7>如果启动hadoop的resourcemanager报OOM,调整yarn-env.sh 
     测试机8G
     JAVA_HEAP_MAX=-Xmx2000m
     YARN_HEAPSIZE=1000
     export YARN_RESOURCEMANAGER_OPTS="-server -Xmx2G -Xms2G -Xmn1G -XX:MaxPermSize=512m -XX:PermSize=512m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC "


        ----经过以上步骤gpadmin就可以直接操作hadoop了
        su - gpadmnin
        hdfs dfs -ls /


#(13)初始化
         hawq init cluster

#<1>可以在初始化之前先查验下配置
         hawq check -f hostfile --hadoop /home/hadoop/hadoop/

#<2>hawq服务的启停
         hawq stop cluster
         hawq start cluster

六、安装验证

 source /usr/local/hawq/greenplum_path.sh 
       export PGPORT = 5432 
       psql -d postgres
       postgres=# CREATE DATABASE mytest;
       postgres=# \c mytest
       mytest=# CREATE TABLE t (i int);
       CREATE TABLE
       mytest=# INSERT INTO t SELECT generate_series(1,100);
       mytest=# \timing
       mytest=# SELECT count(*) FROM t;

#可以通过http://c9test91:50070查看hadoop文件的实际内容,hawq默认在hadoop根目录生成了hawq_default的目录

你可能感兴趣的:(hadoop)