大数据之-hdfs+hive+hbase+kudu+presto集群(6节点)

几个主要软件的下载地址:
presto https://prestosql.io/docs/current/index.html
kudu rpm包地址 https://github.com/MartinWeindel/kudu-rpm/releases
hive http://mirror.bit.edu.cn/apache/hive/
hdfs http://archive.apache.org/dist/hadoop/core/
hbase https://hbase.apache.org/downloads.html
机器分布:

1.环境准备

ip

kudu

presto

hdfs

hive

zk

kafka

hbase

hostname

CPU

内存

192.168.1.10

master coordinator

namenode/datanode

 

 

 

master+phoenix

server1

8c

32g

192.168.1.11

master worker datanode /yarn  

 

 

regionserver

server2

8c

32g

192.168.1.12

master worker datanode 

 

 

 

regionserver

server3

8c

32g

192.168.1.13

tserver worker datanode   

ZK

kafka

regionserver

server4

8c

32g

192.168.1.14

tserver worker datanode 

 

ZK

kafka

regionserver

server5

8c

32g

192.168.1.15

tserver worker datanode  hive

ZK

kafka

regionserver

server6

8c

32g

                     

服务

端口

版本

               

hdfs

xx,xx

3.1.2

           

yarn

xxxx

3.1.2

             

presto

xxxx

337

               

kudu

xxxx

1.10.0

               

zk

xxxx

3.5.5

               

kafka

xxxx

2.12-2.2.1

             

hive dir

xxxx

3.1.2

               

hbase

xxxx

2.0.6

               

hbase客户端

phoenix

5.0.0

               

saltstack 批量操作

*注意:环境做系统基础优化+java环境+免密登录+saltstack安装(略)

 

配置hosts
vi /cat /etc/hosts
192.168.1.10     server1
192.168.1.11     server2
192.168.1.12     server3
192.168.1.13     server4
192.168.1.14    server5
192.168.1.14    server6

2.hdfs安装
2.0、配置系统环境变量/etc/profile
JAVA_HOME=/data/jdk1.8.0_211
JRE_HOME=/data/jdk1.8.0_211/jre
CLASS_PATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$JRE_HOME/lib
export JAVA_HOME JRE_HOME CLASS_PATH PATH
export JAVA_HOME=/data/jdk1.8.0_211
export HADOOP_HOME=/data/hadoop-3.1.2
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

2.1、hadoop-env.sh配置
export JAVA_HOME=/data/jdk1.8.0_171
export HADOOP_HOME=/data/hadoop-3.1.2
export PATH=$[PATH:/data/hadoop-3.1.2/bin](http://path/data/hadoop-3.1.2/bin)
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib:$HADOOP_COMMON_LIB_NATIVE_DIR"
 
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export HDFS_ZKFC_USER=root
export HDFS_JOURNALNODE_USER=root
2.3、core-site.xml 配置

   
   
        fs.defaultFS
        hdfs://server1:8020      
   

   
        io.file.buffer.size
        131072
   

   
        hadoop.tmp.dir
        /data/hadoop-3.1.2/tmp
   

    
    fs.trash.interval
    60
    

    
        hadoop.proxyuser.root.hosts
        *
   

   
        hadoop.proxyuser.root.groups
        *
   

2.4、hdfs-site.xml 配置

   
        dfs.replication
        1
   

   
        dfs.datanode.data.dir
        /data/hadoop-3.1.2/data/dfs/data
   

    
        dfs.namenode.name.dir
        /data/hadoop-3.1.2/dfs/name
    

   
        dfs.http.address
        server1:50070
   

2.5、/data/hadoop-3.1.2/sbin/start-dfs.sh、stop-dfs.sh 文件头部增加以下内容
HDFS_DATANODE_USER=root
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs

配置好之后,所有服务器新建用户:
useradd -m hadoop -G root -s /bin/bash
useradd -m hdfs -G root -s /bin/bash
useradd -m yarn -G root -s /bin/bash

修改命令执行用户,解决namenode重复启停报错问题
vi /data/hadoop-3.1.2/bin/hdfs
HADOOP_SHELL_EXECNAME="root"


3. yarn安装

3.0、/data/hadoop-3.1.2/sbin/start-yarn.sh、stop-yarn.sh 文件头部增加以下内容
 YARN_RESOURCEMANAGER_USER=root
 HADOOP_SECURE_DN_USER=yarn
 YARN_NODEMANAGER_USER=root
3.1、mapred-site.xml配置

       
                 mapreduce.framework.name
                 yarn
       

       
                 mapreduce.jobhistory.address
                 server2:10030
       

       
                mapreduce.jobhistory.joblist.cache.size
                10010
                default 20000
       


3.2、yarn-site.xml配置

    
        yarn.nodemanager.vmem-check-enabled
        false
    

       
                yarn.nodemanager.aux-services
                mapreduce_shuffle
       

       
                yarn.resourcemanager.hostname
                server2
       

       
                yarn.resourcemanager.webapp.address
                server2:8888
       


3.3、hadoop-env.sh配置
 #YARN
 export YARN_RESOURCEMANAGER_USER=root
 export HDFS_DATANODE_SECURE_USER=yarn
 export YARN_NODEMANAGER_USER=root

 /data/hadoop-3.1.2/sbin/start-yarn.sh、stop-yarn.sh 文件头部增加以下内容
 YARN_RESOURCEMANAGER_USER=root
 HADOOP_SECURE_DN_USER=yarn
 YARN_NODEMANAGER_USER=root

3.4、workers 配置
server1
server2
server3
server4
server5
server6
3.5、分发配置文件到相应的节点启动
salt -N 'hdfs' cp.get_file salt://xxx /data/hadoop-3.1.2/xxx
3.6、 启动
执行格式化
hadoop namenode -format
启动
start-all.sh

 

4. hive安装
4.1.0 hive-site.xml配置

   
        hive.metastore.warehouse.dir
        /user/hive/warehouse
   

   
        hive.exec.mode.local.auto
        true
        Let Hive determine whether to run in local mode automatically
   

   
        javax.jdo.option.ConnectionURL
       jdbc:mysql://x.x.x.x:3306/hive?createDatabaseInfoNotExist=true
   

   
        javax.jdo.option.ConnectionDriverName
        com.mysql.jdbc.Driver
   

   
        javax.jdo.option.ConnectionUserName
        hive
   

   
        javax.jdo.option.ConnectionPassword
        123456
   

   
   
      hive.cli.print.header
      true
   

   
   
      hive.cli.print.current.db
      true
   


    hive.server2.authentication
    NONE

4.1.1 hive-env.sh配置
HADOOP_HOME=/data/hadoop-3.1.2
export HIVE_CONF_DIR=/data/hive-3.1.2/conf
export HIVE_AUX_JARS_PATH=/data/hive-3.1.2/lib

4.1.2 分发配置文件
cp mysql-connector-java-5.1.46.jar /data/hive-3.1.2/lib
salt 'server6' cp.get_file salt://xxx /data/hive-3.1.2/conf/xxx
4.1.3 hdfs里创建目录
hadoop fs -mkdir -p /user/hive/warehouse
hadoop fs -chmod g+w /user/hive/warehouse
hadoop fs -mkdir -p /tmp
hadoop fs -chmod g+w /tmp

4.1.4 初始化并启动服务
主节点安装mysql
yum install -y mariadb-server 
systemctl start mariadb 
systemctl enable mariadb 
初始化mysql 
mysql_secure_installation 
创建hive元数据库
create database hive character set utf8 ;  
CREATE USER 'hive'@'%'IDENTIFIED BY '123456';
GRANT ALL PRIVILEGES ON *.* TO 'hive'@'%';
FLUSH PRIVILEGES;
#初始化hive元数据库,注意执行前检查mysql大小写是否敏感
schematool -dbType mysql -initSchema
#停止hive的2个服务
nohup /data/hive-3.1.2/bin/hive --service metastore -p 9083 &
nohup /data/hive-3.1.2/bin/hive --service hiveserver2 &
#停止hive的2个服务
kill -9 `ps -ef|grep hive|grep -v grep|awk '{print $2}'`

5、kudu安装
#编辑master.gflagfile
## Comma-separated list of the RPC addresses belonging to all Masters in this cluster.
## NOTE: if not specified, configures a non-replicated Master.
#--master_addresses=
--master_addresses=server1:7051,server2:7051,server3:7051
--log_dir=/data/kudu/master/logs
--fs_wal_dir=/data/kudu/master/wals
--fs_data_dirs=/data/kudu/master/data
 

#编辑tserver.gflagfile
#Comma separated addresses of the masters which the tablet server should connect to.
--tserver_master_addrs=server1:7051,server2:7051,server3:7051
--log_dir=/data/kudu/tserver/logs
--fs_wal_dir=/data/kudu/tserver/wals
--fs_data_dirs=/data/kudu/tserver/data

 
5.1#分发服务器安装包
salt -N 'kudu' cp.get_file salt://kudu-1.10.0-1.x86_64.rpm /data/kudu-1.10.0-1.x86_64.rpm
salt -N 'kudu' cp.get_file salt://cyrus-sasl-gssapi-2.1.26-23.el7.x86_64.rpm /data/cyrus-sasl-gssapi-2.1.26-23.el7.x86_64.rpm
salt -N 'kudu' cp.get_file salt://cyrus-sasl-plain-2.1.26-23.el7.x86_64.rpm /data/cyrus-sasl-plain-2.1.26-23.el7.x86_64.rpm
#安装2个依赖,否则报错
salt -N 'kudu' cmd.run 'cd /data&&rpm -ivh cyrus-sasl-gssapi-2.1.26-23.el7.x86_64.rpm'
salt -N 'kudu' cmd.run 'cd /data&&rpm -ivh cyrus-sasl-plain-2.1.26-23.el7.x86_64.rpm'

5.2#执行安装
salt -N 'kudu' cmd.run 'cd /data&&rpm -ivh kudu-1.10.0-1.x86_64.rpm'
#分发配置文件
salt -N 'km' cp.get_file salt://master.gflagfile /etc/kudu/conf/master.gflagfile
salt -N 'kt' cp.get_file salt://tserver.gflagfile /etc/kudu/conf/tserver.gflagfile
#创建数据目录
salt -N 'km' cmd.run 'mkdir -p /data/kudu/master/logs \
 /data/kudu/master/wals /data/kudu/master/data'
salt -N 'kt' cmd.run 'mkdir -p /data/kudu/tserver/logs \
 /data/kudu/tserver/data /data/kudu/tserver/wals'
#修改目录权限
salt -N 'kudu' cmd.run 'chown -R kudu:kudu /data/kudu'
 
5.3启动kudu-master
salt -N 'km' cmd.run 'systemctl start kudu-master'
#检查节点服务情况
salt -N 'km' cmd.run 'ps -ef|grep kudu'
#启动tserver
salt -N 'kt' cmd.run 'systemctl start kudu-tserver'
#检查节点服务情况
salt -N 'kt' cmd.run 'ps -ef|grep kudu'

6、presto337安装
salt -N 'pt' cp.get_file salt://jdk-11.0.6_linux-x64_bin.tar.gz /data/jdk-11.0.6_linux-x64_bin.tar.gz
salt -N 'pt' cmd.run 'cd /data&&tar xf jdk-11.0.6_linux-x64_bin.tar.gz'

6.1、配置文件:
config.properties
主节点
coordinator=true
node-scheduler.include-coordinator=false
http-server.http.port=8080
discovery-server.enabled=true
discovery.uri=http://192.168.1.10:8080
query.max-memory=25GB
query.max-memory-per-node=5GB
query.max-total-memory-per-node=6GB
query.max-run-time=900s


从节点
coordinator=false
http-server.http.port=8080
discovery.uri=http://192.168.1.10:8080
query.max-memory=25GB
query.max-memory-per-node=5GB

#内存需预留30%给系统和满负载上浮
jvm.config
-server
-Xmx20G
-XX:+UseG1GC
-XX:G1HeapRegionSize=32M
-XX:+UseGCOverheadLimit
-XX:+ExplicitGCInvokesConcurrent
-XX:+HeapDumpOnOutOfMemoryError
-XX:OnOutOfMemoryError=kill -9 %p
-XX:+CMSClassUnloadingEnabled
-XX:+AggressiveOpts
-DHADOOP_USER_NAME=root
-Djdk.attach.allowAttachSelf=true
 


log.properties
com.facebook.presto=INFO


node.properties  注意node.id每节点需不一样
node.environment=presto
node.id=node_coordinator_10
node.data-dir=/data/presto-data


catalog目录下,数据源连接参数
hive.properties
connector.name=hive-hadoop2
hive.metastore.uri=thrift://192.168.1.16:9083
hive.config.resources=/data/hadoop-3.1.2/etc/hadoop/core-site.xml,/data/hadoop-3.1.2/etc/hadoop/hdfs-site.xml


kudu.properties

connector.name=kudu

## List of Kudu master addresses, at least one is needed (comma separated)
## Supported formats: example.com, example.com:7051, 192.0.2.1, 192.0.2.1:7051,
##                    [2001:db8::1], [2001:db8::1]:7051, 2001:db8::1
kudu.client.master-addresses=server1:7051,server2:7051,server3:7051

## Kudu does not support schemas, but the connector can emulate them optionally.
## By default, this feature is disabled, and all tables belong to the default schema.
## For more details see connector documentation.
kudu.schema-emulation.enabled=true

## Prefix to use for schema emulation (only relevant if `kudu.schema-emulation.enabled=true`)
## The standard prefix is `presto::`. Empty prefix is also supported.
## For more details see connector documentation.
##kudu.schema-emulation.prefix=

#######################
### Advanced Kudu Java client configuration
#######################

## Default timeout used for administrative operations (e.g. createTable, deleteTable, etc.)
#kudu.client.defaultAdminOperationTimeout = 30s

## Default timeout used for user operations
#kudu.client.defaultOperationTimeout = 30s

## Default timeout to use when waiting on data from a socket
#kudu.client.defaultSocketReadTimeout = 10s

## Disable Kudu client's collection of statistics.
#kudu.client.disableStatistics = false


mongodb.properties
connector.name=mongodb
mongodb.seeds=192.168.1.17:27017
mongodb.credentials=root:admin@admin
#mongodb.socket-keep-alive=true

bin/launcher
在启动程序命令前增加jdk11参数

PATH=/data/jdk-11.0.6/bin/:$PATH
java -version
exec "$(dirname "$0")/launcher.py" "$@"

1、文件分发
salt -N 'pt' cp.get_file salt://presto-server-337.tar.gz /data/presto-server-337.tar.gz
2、解压
salt -N 'pt' cmd.run 'cd /data&&tar xf presto-server-337.tar.gz'
3、配置文件分发(因涉密详细配置略)
salt -N 'pt' cp.get_dir salt://etc /data/presto-server-337/
4、创建数据目录
salt -N 'pt' cmd.run 'mkdir –p /data/presto-data'
5、执行文件授权
salt -N 'pt' cmd.run 'chmod +x /data/presto-server-337/bin/*'
6、启动服务
salt -N 'pt' cmd.run 'source /etc/profile&&/data/presto-server-337/bin/launcher start'

7、hbase 2.0 安装
7.1.0  hbase-env.sh 配置
export JAVA_HOME=/data/jdk1.8.0_211/
export HBASE_LOG_DIR=${HBASE_Data}/logs
export HBASE_MANAGES_ZK=false
#jvm内存参数,酌情配置
export HBASE_OPTS="-XX:+UseConcMarkSweepGC"
export HBASE_HEAPSIZE=8G
export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -Xmx4g -Xms4g"
export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -Xms8g -Xmx8g"

7.1.1  hbase-site.xml 配置

        
                hbase.master.info.port
                16000
        

        
                hbase.tmp.dir
                /data/hbase-2.0.6/tmp
        

        
        
                hbase.rootdir
                hdfs://server1:8020/hbase
        

        
        
                hbase.zookeeper.quorum
                server4,server5,server6:2181
        

       
        
                hbase.zookeeper.property.dataDir
                /data/hbase-2.0.6/data
        

        
        
                hbase.cluster.distributed
                true
        

        
                hbase.unsafe.stream.capability.enforce
                false
        

        
                zookeeper.znode.parent
                /hbase
        

         
                phoenix.schema.isNamespaceMappingEnabled
                true
         

         
              phoenix.schema.mapSystemTablesToNamespace
              true
        

7.1.2  regionservers 配置
server1
server2
server3
server4
server5
server6

7.1.3 分发配置文件
salt -N 'hbase' cp.get_file salt://xxx /datat/hbase-2.0.6/xxx
salt -N 'hbase' cmd.run  'cd /data/hbase-2.0.6/conf&&ln -s /data/hadoop-3.1.2/etc/hadoop/hdfs-site.xml'
salt -N 'hbase' cmd.run  'cd /data/hbase-2.0.6/conf&&ln -s /data/hadoop-3.1.2/etc/hadoop/core-site.xml'
salt -N 'hbase' cmd.run  'mkdir /data/hbase-2.0.6/{data,logs,tmp}'


启动start-hbase.sh

7.1.4 安装  phoenix 5.0.0

主节点下载解压
复制jar到所有hbase节点
cp /data/phoenix-5.0.0/phoenix-5.0.0-HBase-2.0-server.jar /data/hbase-2.0.6/lib/

确认所有节点hbase-site.xml有参数phoenix.schema.isNamespaceMappingEnabled、phoenix.schema.mapSystemTablesToNamespace

cp /data/hbase-2.0.6/conf/hbase-site.xml /data/phoenix-5.0.0/bin/

进入cli
/data/phoenix-5.0.0/bin/sqlline.py
!tables

 

参考文章作者:吉甫作诵 

链接:https://blog.csdn.net/fly0512/article/details/100863889

 

 

 

你可能感兴趣的:(大数据,hdfs,hive,kudu,presto,hbase)