hadoop3.1.4+hive3.1.2+spark3.1.3+kyuubi1.5.1集成

环境

| yfbd-virtual-machine-01 | 10.216.6.227 | zookeeper datanode namenode spark master/work hive hivemetastore|
| yfbd-virtual-machine-02 | 10.216.6.228 | zookeeper datanode senamenode spark work hive|
| yfbd-virtual-machine-03 | 10.216.6.229 | zookeeper datanode spark work hive|

配置免密登录

1.修改hosts文件

10.216.6.227 yfbd-virtual-machine-01
10.216.6.228 yfbd-virtual-machine-02
10.216.6.229 yfbd-virtual-machine-03

2.免密登录

  • 在227上执行
ssh-keygen
  • 在227上执行
ssh-copy-id -i /home/yfbd/.ssh/id_rsa.pub yfbd-virtual-machine-02
ssh-copy-id -i /home/yfbd/.ssh/id_rsa.pub yfbd-virtual-machine-03

配置环境变量

vim /etc/profile
#JAVA_HOME
export JAVA_HOME=/home/yfbd/bigdata/jdk1.8
export PATH=$PATH:$JAVA_HOME/bin

Hadoop HA部署

1.配置core-site.xml

vim /home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop/core-site.xml

      
        fs.defaultFS  
        hdfs://ns  
     
    
      
        hadoop.tmp.dir  
        /home/yfbd/bigdata/hadoop-3.1.4/data/tmp  
     
    
      
        hadoop.http.staticuser.user  
        yfbd  
     
    
    
        hadoop.proxyuser.yfbd.hosts
        *
    
    
    
        hadoop.proxyuser.yfbd.groups
        *
    
    
      
        ha.zookeeper.quorum  
        yfbd-virtual-machine-01:2181,yfbd-virtual-machine-02:2181,yfbd-virtual-machine-03:2181  
    


2.配置 hdfs-site.xml


    
   
      dfs.replication  
    3  
  
 
   dfs.nameservices
   ns
 
    

 
   dfs.ha.namenodes.ns
   nn1,nn2
 

 
   dfs.namenode.rpc-address.ns.nn1
   yfbd-virtual-machine-01:8020
 

 
   dfs.namenode.http-address.ns.nn1
   yfbd-virtual-machine-01:50070
 

 
   dfs.namenode.rpc-address.ns.nn2
   yfbd-virtual-machine-02:8020
 

 
   dfs.namenode.http-address.ns.nn2
   yfbd-virtual-machine-02:50070
 

 
 
   dfs.namenode.shared.edits.dir
   qjournal://yfbd-virtual-machine-01:8485;yfbd-virtual-machine-02:8485;yfbd-virtual-machine-03:8485/ns
 

 
   dfs.journalnode.edits.dir
   /home/yfbd/bigdata/hadoop-3.1.4/data/journal
 

 
   dfs.ha.automatic-failover.enabled
   true
 

  
 
   dfs.client.failover.proxy.provider.ns
   org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
 

 
 
   dfs.ha.fencing.methods
   sshfence
 
 
   dfs.permissions.enabled
   false
 

3.配置yarn-site.xml


    
      
        yarn.nodemanager.aux-services  
        mapreduce_shuffle  
    
    
      
      
      
       yarn.resourcemanager.ha.enabled  
       true  
      
      
      
       yarn.resourcemanager.cluster-id  
       rmcluster  
      
      
       yarn.resourcemanager.ha.rm-ids  
       rm1,rm2,rm3  
      
      
       yarn.resourcemanager.hostname.rm1  
       yfbd-virtual-machine-01  
      
      
       yarn.resourcemanager.hostname.rm2  
       yfbd-virtual-machine-02  
      
    
       yarn.resourcemanager.hostname.rm3
       yfbd-virtual-machine-03
     
       
      
       yarn.resourcemanager.zk-address  
        yfbd-virtual-machine-01:2181,yfbd-virtual-machine-02:2181,yfbd-virtual-machine-03:2181  
      
       
      
       yarn.resourcemanager.recovery.enabled  
       true  
      
   
       
      
       yarn.resourcemanager.store.class  
     org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore 
     
    
    
    
        yarn.nodemanager.env-whitelist
        JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME
    
    
    
    
        yarn.log-aggregation-enable
        true
    

    
      
        yarn.log.server.url  
        http://yfbd-virtual-machine-01:19888/jobhistory/logs
    

    
    
        yarn.log-aggregation.retain-seconds
        604800
    
      
       yarn.resourcemanager.ha.id  
       rm1  
    
    
        yarn.nodemanager.pmem-check-enabled
        false
    
    
        yarn.nodemanager.vmem-check-enabled
        false
    

4.配置workers

yfbd-virtual-machine-01
yfbd-virtual-machine-02
yfbd-virtual-machine-03

5.格式化namenode

cd /home/yfbd/bigdata/hadoop-3.1.4/bin
./hadoop namenode -format

6.开启集群

cd /home/yfbd/bigdata/hadoop-3.1.4/sbin
./start-all.sh

hive 部署

1.配置环境变量

export HADOOP_HOME=/home/yfbd/bigdata/hadoop-3.1.4
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin

export HADOOP_MAPRED_HOME=${HADOOP_HOME}
export HADOOP_COMMON_HOME=${HADOOP_HOME}
export HADOOP_HDFS_HOME=${HADOOP_HOME}
export HADOOP_YARN_HOME=${HADOOP_HOME}

export HADOOP_CONF_DIR=/home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

export ZOOKEEPER_HOME=/home/yfbd/bigdata/zookeeper
export PATH=$PATH:$ZOOKEEPER_HOME/bin

export HIVE_HOME=/home/yfbd/bigdata/hive3.1.2
export PATH=$PATH:$HIVE_HOME/bin

2.配置hive-site.xml

  • master节点

    
        hive.metastore.warehouse.dir
        /user/hive_remote/warehouse
        设置hdfs中的默认目录
    
    
        javax.jdo.option.ConnectionURL
        jdbc:mysql://10.216.3.17:3306/hive3_remote?createDatabaseIfNotExist=true&useSSL=false&allowPublicKeyRetrieval=true
        保存元数据的数据库连接
    
    
        javax.jdo.option.ConnectionDriverName
        com.mysql.cj.jdbc.Driver
        数据库驱动,需要拷贝到${HIVE_HOME}/lib目录
    
    
        javax.jdo.option.ConnectionUserName
        hive3
        用户名和密码
    
    
        javax.jdo.option.ConnectionPassword
        123456
        用户名和密码
    
    
        hive.cli.print.header
        true
    
    
        hive.cli.print.current.db
        true
    
     
        spark.home 
        /home/yfbd/bigdata/spark-3.1.3-bin-hadoop3.2 
    
    
        hive.aux.jars.path
        file:///home/yfbd/bigdata/hive3.1.2/lib
    

  • 客户端节点

    
        hive.metastore.uris
        thrift://yfbd-virtual-machine-01:9083
        metastore地址
    

3.初始化hive元数据

cd /home/yfbd/bigdata/hive3.1.2/bin
schematool -dbType mysql -initSchema

4.启动Metastore服务

hive --service metastore

spark master-slave部署

1.下载spark包

https://mirrors.tuna.tsinghua.edu.cn/apache/spark/spark-3.1.2/spark-3.1.2-bin-hadoop3.2.tgz

2.解压

tar -zxvf spark-3.1.2-bin-hadoop3.2.tgz

3.添加Hadoop配置文件软链接

ln -s /home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop/core-site.xml
ln -s /home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop/hdfs-site.xml

4.添加hive-site.xml配置文件

vim hive-site.xml

    
        hive.metastore.warehouse.dir
        /user/hive_remote/warehouse
        设置hdfs中的默认目录
    
    
        javax.jdo.option.ConnectionURL
        jdbc:mysql://10.216.3.17:3306/hive3_remote?createDatabaseIfNotExist=true&useSSL=false&allowPublicKeyRetrieval=true
        保存元数据的数据库连接
    
    
        javax.jdo.option.ConnectionDriverName
        com.mysql.cj.jdbc.Driver
        数据库驱动,需要拷贝到${HIVE_HOME}/lib目录
    
    
        javax.jdo.option.ConnectionUserName
        hive3
        用户名和密码
    
    
        javax.jdo.option.ConnectionPassword
        123456
        用户名和密码
    
    
        hive.cli.print.header
        true
    
    
        hive.cli.print.current.db
        true
    
     
        spark.home 
        /home/yfbd/bigdata/spark-3.1.3-bin-hadoop3.2 
    
    
        hive.aux.jars.path
        file:///home/yfbd/bigdata/hive3.1.2/lib 
    

5.编辑spark-env.sh

export JAVA_HOME=/home/yfbd/bigdata/jdk1.8
export HADOOP_HOME=/home/yfbd/bigdata/hadoop-3.1.4
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SPARK_MASTER_HOST=yfbd-virtual-machine-01
export SPARK_MASTER_WEBUI_PORT=8060
export SPARK_WORKER_WEBUI_PORT=8061
export SPARK_WORKER_MEMORY=500m
export SPARK_WORKER_CORES=1
export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER
 -Dspark.deploy.zookeeper.url=yfbd-virtual-machine-01:2181,yfbd-virtual-machine-02:2181,yfbd-virtual-machine-03:2181
 -Dspark.deploy.zookeeper.dir=/opt/hadoop/data/zookeeper/spark"
export HIVE_HOME=/home/yfbd/bigdata/hive3.1.2
export YARN_CONF_DIR=/home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop

6.编辑workers

yfbd-virtual-machine-01
yfbd-virtual-machine-02
yfbd-virtual-machine-03

7.编辑spark-defaults.conf

spark.sql.hive.metastore.version        3.1.2
spark.sql.hive.metastore.jars           path
spark.sql.hive.metastore.jars.path      file:///home/yfbd/bigdata/hive3.1.2/lib/*.jar
spark.sql.uris                          thrift://yfbd-virtual-machine-01:9083

8.启动spark

cd /home/yfbd/bigdata/spark-3.1.3-bin-hadoop3.2/sbin
./start-all.sh

kyuubi部署

1.下载kyuubi包,解压

https://dlcdn.apache.org/incubator/kyuubi/kyuubi-1.5.1-incubating/apache-kyuubi-1.5.1-incubating-bin.tgz
tar -zxvf apache-kyuubi-1.5.1-incubating-bin.tgz

2.配置kyuubi-defaults.conf

cp kyuubi-defaults.conf.template kyuubi-defaults.conf

kyuubi.ha.zookeeper.quorum=yfbd-virtual-machine-01:2181,yfbd-virtual-machine-02:2181,yfbd-virtual-machine-03:2181
kyuubi.authentication=NONE
kyuubi.engine.share.level=USER
kyuubi.frontend.bind.host=0.0.0.0
kyuubi.frontend.bind.port=10009
kyuubi.ha.zookeeper.namespace=kyuubi
kyuubi.session.engine.idle.timeout=PT10H

spark.master=yarn
spark.submit.deployMode=cluster
spark.dynamicAllocation.enabled=true
spark.dynamicAllocation.minExecutors=0
spark.dynamicAllocation.maxExecutors=20
spark.dynamicAllocation.executorIdleTimeout=60
spark.shuffle.service.enabled=true

3.配置kyuubi-env.sh

cp kyuubi-env.sh.template kyuubi-env.sh

export JAVA_HOME=/home/yfbd/bigdata/jdk1.8
export SPARK_HOME=/home/yfbd/bigdata/spark-3.1.3-bin-hadoop3.2
export SPARK_CONF_DIR=${SPARK_HOME}/conf
export HADOOP_CONF_DIR=/home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop
export KYUUBI_MAX_LOG_FILES=10

4.配置hive-site.xml

cp /home/yfbd/bigdata/hive3.1.2/conf/hive-site.xml /home/yfbd/bigdata/apache-kyuubi-1.5.1-incubating-bin/conf/

5.启动kyuubi

cd /home/yfbd/bigdata/apache-kyuubi-1.5.1-incubating-bin/bin
./kyuubi start

你可能感兴趣的:(hadoop3.1.4+hive3.1.2+spark3.1.3+kyuubi1.5.1集成)