五、命令行安装HDB2.2
1.系统参数
vi /etc/sysctl.conf
#modify
kernel.sysrq = 1
net.ipv4.tcp_syncookies = 0
#added for hawq
kernel.shmmni = 4096
kernel.sem = 250 512000 100 2048
kernel.msgmni = 2048
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_max_syn_backlog = 200000
net.ipv4.conf.all.arp_filter = 1
net.ipv4.ip_local_port_range = 1281 65535
net.core.netdev_max_backlog = 200000
vm.overcommit_memory = 2
fs.nr_open = 3000000
kernel.threads-max = 798720
kernel.pid_max = 798720
net.core.rmem_max=2097152
net.core.wmem_max=2097152
sysctl -p #让配置生效
2.vi /etc/security/limits.conf
增加以下内容
* soft nofile 2900000
* hard nofile 2900000
* soft nproc 131072
* hard nproc 131072
3.hdfs-site.xml
dfs.allow.truncate true
dfs.block.access.token.enable 对于不安全的HDFS集群为false ,或者对于安全集群为true
dfs.block.local-path-access.user gpadmin
dfs.client.read.shortcircuit true
dfs.client.socket-timeout 300000000
dfs.client.use.legacy.blockreader.local false
dfs.datanode.data.dir.perm 750
dfs.datanode.handler.count 60
dfs.datanode.max.transfer.threads 40960
dfs.datanode.socket.write.timeout 7200000
dfs.namenode.accesstime.precision 0
dfs.namenode.handler.count 600
dfs.support.append true
dfs.allow.truncate
true
dfs.block.access.token.enable
false
dfs.block.local-path-access.user
gpadmin
dfs.client.read.shortcircuit #安装后namenode无法启动,去掉正常
true
dfs.client.socket-timeout #安装后namenode无法启动,去掉正常
300000000
dfs.client.use.legacy.blockreader.local
false
dfs.datanode.data.dir.perm
750
dfs.datanode.handler.count
60
dfs.datanode.max.transfer.threads
40960
dfs.datanode.socket.write.timeout
7200000
dfs.namenode.accesstime.precision
0
dfs.namenode.handler.count
600
4.core-site.xml
ipc.client.connection.maxidletime 3600000
ipc.client.connect.timeout 300000
ipc.server.listen.queue.size 3300
<property>
<name>ipc.client.connection.maxidletimename>
<value>3600000value>
property>
<property>
<name>ipc.client.connect.timeoutname>
<value>300000value>
property>
<property>
<name>ipc.server.listen.queue.sizename>
<value>3300value>
property>
#重启集群以使配置生效
#注意:重启报错Could not reserve enough space for object heap,可以/etc/profile
#export _JAVA_OPTIONS="-Xms512m -Xmx1024m"
#强制限制java内存大小,测试环境未采用此方法,通过调整配置文件堆大小
5.下载文件
https://network.pivotal.io/官网下载HDB2.2.0.0-414.el6.tar.gz
cd /home/hawqpackage
chmod a+rx hawqpackage/
tar xzvf hdb-2.2.0.0-4141.el6.tar.gz
cd hdb-2.2.0
6.安装辅助包
(1)httpd
yum install httpd
service httpd start
**(2)hdb源:在c9test91上执行**
cd hdb-2.2.0
./setup_repo.sh #将HDB软件分发添加到本地的yum软件包存储库,setup_repo.sh在本地主机上创建一个名为hdb-.repo的HDB存储hdb-.repo 。
该脚本还会从httpd服务器( /var/www/html )的文档根目录创建一个符号链接到HDB .tar.gz文件解压缩的目录。
创建了/etc/yum.repos.d/hdb-2.2.0.0.repo
可以通过http://c9test91/hdb-2.2.0.0来直接访问
scp /etc/yum.repos.d/hdb-2.2.0.0.repo c9test92:/etc/yum.repos.d/
scp /etc/yum.repos.d/hdb-2.2.0.0.repo c9test93:/etc/yum.repos.d/
scp /etc/yum.repos.d/hdb-2.2.0.0.repo c9test94:/etc/yum.repos.d/
**(3)epel-release源:在所有机器上执行**
yum install epel-release
修改/etc/yum.repos.d/epel.repo
打开所有baseurl 关闭所有mirror
[epel]
name=Extra Packages for Enterprise Linux 6 - $basearch
baseurl=http://download.fedoraproject.org/pub/epel/6/$basearch
#mirrorlist=https://mirrors.fedoraproject.org/metalink?repo=epel-6&arch=$basearch
。。。其余类似
导入epel源后报错的解决:
yum repolist #确定本地yum源正常
/etc/yum.repos.d/epel.repo打开baseurl 关闭mirror
**(4)错误处理:**
报缺少包错误:Package: hawq-ranger-plugin_2_2_0_0-2.2.0.0-4141.el6.noarch (hdb-2.2.0.0) Requires: bigtop-tomcat
wget -O /etc/yum.repos.d/bigtop.repo http://archive.apache.org/dist/bigtop/1.2.0/repos/centos6/bigtop.repo
scp /etc/yum.repos.d/bigtop.repo root@c9test92:/etc/yum.repos.d/
scp /etc/yum.repos.d/bigtop.repo root@c9test93:/etc/yum.repos.d/
scp /etc/yum.repos.d/bigtop.repo root@c9test94:/etc/yum.repos.d/
7.安装hdb2.2
su - root
yum install -y hawq
source /usr/local/hawq/greenplum_path.sh
#hawq软件默认安装在/usr/local/hawq下,如果想在root下操作编辑/etc/profile
source /usr/local/hawq/greenplum_path.sh
#source /etc/profile使其生效即可。
#注意如果yum出错,可以用yum clean all清除之前的安装
#(1)编辑所有主机文件hostfile和segment主机文件seg_hosts**
vi hostfile
c9test91
c9test92
c9test93
c9test94
vi seg_hosts
c9test93
c9test94
#(2)其他主机安装hawq软件**
hawq ssh -f hostfile -e "yum install -y hawq"
#(3)gpadmin用户文件
su - gpadmin
vi .bash_profile
source /usr/local/hawq/greenplum_path.sh
export HADOOP_PREFIX="/home/hadoop/hadoop"
export HADOOP_MAPRED_HOME=$HADOOP_PREFIX
export HADOOP_COMMON_HOME=$HADOOP_PREFIX
export HADOOP_HOME=$HADOOP_PREFIX
export HADOOP_HDFS_HOME=$HADOOP_PREFIX
export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
export YARN_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
export YARN_HOME=$HADOOP_PREFIX
#export HBASE_HOME="/opt/software/hbase"
#export SPARK_HOME="/opt/software/spark"
#export SPARK_JAR="hdfs:///spark/spark.jar"
export PATH=$PATH:$HADOOP_PREFIX/bin:$HADOOP_PREFIX/sbin
将文件scp到其他机器
hawq scp -f hostfile .bash_profile =:/home/gpadmin/
让其他机器生效:
hawq ssh -f hostfile -e "source /home/gpadmin/.bash_profile"
#(4)确认主机已经安装hawq
hawq ssh -f hostfile -e "ls -l $GPHOME "
#(5)创建数据存储主目录,属于gpadmin
mkdir -p /home/hawqdata/master
chown -R gpadmin:gpadmin /home/hawqdata/
standby host:
hawq ssh -h c9test92 -e 'mkdir -p /home/hawqdata/master'
hawq ssh -h c9test92 -e 'chown -R gpadmin:gpadmin /home/hawqdata'
#(6)创建数据存储segment目录
hawq ssh -f seg_hosts -e 'mkdir -p /home/hawqdata/segment'
hawq ssh -f seg_hosts -e 'chown -R gpadmin:gpadmin /home/hawqdata'
#(7)创建临时目录用于溢出文件,使用每个驱动器目录平衡写入
$ dirs="/home/hawqdata/tmp1 /home/hawqdata/tmp2"
$ mkdir -p $dirs
$ chown -R gpadmin:gpadmin $dirs
$ hawq ssh -h c9test92 -e "mkdir -p $dirs"
$ hawq ssh -h c9test92 -e "chown -R gpadmin:gpadmin $dirs"
$ hawq ssh -f seg_hosts -e "mkdir -p $dirs"
$ hawq ssh -f seg_hosts -e "chown -R gpadmin:gpadmin $dirs"
#(8)hawq-site.xml
cp $GPHOME/etc/template-hawq-site.xml hawq-site.xml
#<1>更改hawq_dfs_url属性定义以使用实际的Namenode端口号以及HAWQ数据目录:
hawq_dfs_url
bvdata/hawq_default
URL for accessing HDFS.
如果hdfs配置了HA,使用服务ID替代hawq_dfs_url值
gpadmin设置为您指定的父目录HDFS目录的所有者:
hdfs dfs -chown gpadmin /
#<2>端口和目录
Property Example Value
hawq_master_address_host c9test91
hawq_master_address_port 5432
hawq_standby_address_host c9test92
hawq_segment_address_port 40000
hawq_master_directory /home/hawqdata/master
hawq_segment_directory /home/hawqdata/segment
hawq_master_temp_directory /home/hawqdata/tmp1,/home/hawqdata/tmp2
hawq_segment_temp_directory /home/hawqdata/tmp1,/home/hawqdata/tmp2
hawq_global_rm_type none
#<3>配置yarn属性
属性 示例值 评论
hawq_global_rm_type yarn 当设置为yarn时,HAWQ要求您配置其他YARN配置参数( hawq_rm_yarn_address或hawq_rm_yarn_scheduler_address
hawq_rm_yarn_address c9test93:8032 此属性必须匹配yarn-site.xml yarn.resourcemanager.address中的yarn.resourcemanager.address值。
hawq_rm_yarn_scheduler_address c9test93:8030 此属性必须匹配yarn-site.xml的yarn.resourcemanager.scheduler.address值。
如果yarn配置了HA,需要$GPHOME/etc/yarn-client.xml
属性 示例值 评论
yarn.resourcemanager.ha rm1.example.com:8032,rm2.example.com:8032 以逗号分隔的资源管理器主机列表。 启用高可用性时,YARN会忽略hawq_rm_yarn_address中的值,并改用该属性的值。
yarn.resourcemanager.scheduler.ha rm1.example.com:8030,rm2.example.com:8030 逗号分隔的调度程序主机列表。 当启用高可用性时,YARN忽略hawq_rm_yarn_scheduler_address中的值,并改用该属性的值。
#(9)slaves
vi $GPHOME/etc/slaves
c9test93
c9test94
#(10)Namenode的HA,编辑hdfs-client.xml
${GPHOME}/etc/hdfs-client.xml
dfs.ha.namenodes.bvdata
c9test91,c9test92
dfs.namenode.http-address.bvdata.c9test91
c9test91:50070
dfs.namenode.http-address.bvdata.c9test92
c9test92:50070
dfs.namenode.rpc-address.bvdata.c9test91
c9test91:8020
dfs.namenode.rpc-address.bvdata.c9test92
c9test92:8020
dfs.nameservices
bvdata
#(11)将hawq-site.xml和hdfs-client.xml分发到其他机器
hawq scp -f hostfile hawq-site.xml slaves hdfs-client.xml =:$GPHOME/etc/
..
#(12)初始化集群
#<1>所有机器把软件目前目前设置成为gpadmin
hawq ssh -f hostfile -e "chown -R gpadmin:gpadmin /usr/local/hawq/"
#<2>将gpadmin加入/etc/sudoers组
#<3>所有机器的用户环境变量:
vi .bash_profile
export HADOOP_PREFIX="/home/hadoop/hadoop"
export HADOOP_MAPRED_HOME=$HADOOP_PREFIX
export HADOOP_COMMON_HOME=$HADOOP_PREFIX
export HADOOP_HOME=$HADOOP_PREFIX
export HADOOP_HDFS_HOME=$HADOOP_PREFIX
export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
export YARN_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
export YARN_HOME=$HADOOP_PREFIX
#export HBASE_HOME="/opt/software/hbase"
#export SPARK_HOME="/opt/software/spark"
#export SPARK_JAR="hdfs:///spark/spark.jar"
export PATH=$PATH:$HOME/bin:$HADOOP_PREFIX/bin:$HADOOP_PREFIX/sbin
#<4>将hadoop和zookeeper权限设置为777
chmod -R 777 /home/hadoop/hadoop 千万别把hadoop用户的根目录设置成777否则hadoop无法启动
#<5>将gpadmin加入hadoop组
sudo usermod -a -G hadoop gpadmin
#<6>如果报/tmp权限的问题,需要注意将/tmp目录设置为777
#<7>如果启动hadoop的resourcemanager报OOM,调整yarn-env.sh
测试机8G
JAVA_HEAP_MAX=-Xmx2000m
YARN_HEAPSIZE=1000
export YARN_RESOURCEMANAGER_OPTS="-server -Xmx2G -Xms2G -Xmn1G -XX:MaxPermSize=512m -XX:PermSize=512m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC "
----经过以上步骤gpadmin就可以直接操作hadoop了
su - gpadmnin
hdfs dfs -ls /
#(13)初始化
hawq init cluster
#<1>可以在初始化之前先查验下配置
hawq check -f hostfile --hadoop /home/hadoop/hadoop/
#<2>hawq服务的启停
hawq stop cluster
hawq start cluster
六、安装验证
source /usr/local/hawq/greenplum_path.sh
export PGPORT = 5432
psql -d postgres
postgres=# CREATE DATABASE mytest;
postgres=# \c mytest
mytest=# CREATE TABLE t (i int);
CREATE TABLE
mytest=# INSERT INTO t SELECT generate_series(1,100);
mytest=# \timing
mytest=# SELECT count(*) FROM t;
#可以通过http://c9test91:50070查看hadoop文件的实际内容,hawq默认在hadoop根目录生成了hawq_default的目录