环境
| yfbd-virtual-machine-01 | 10.216.6.227 | zookeeper datanode namenode spark master/work hive hivemetastore|
| yfbd-virtual-machine-02 | 10.216.6.228 | zookeeper datanode senamenode spark work hive|
| yfbd-virtual-machine-03 | 10.216.6.229 | zookeeper datanode spark work hive|
配置免密登录
1.修改hosts文件
10.216.6.227 yfbd-virtual-machine-01
10.216.6.228 yfbd-virtual-machine-02
10.216.6.229 yfbd-virtual-machine-03
2.免密登录
ssh-keygen
ssh-copy-id -i /home/yfbd/.ssh/id_rsa.pub yfbd-virtual-machine-02
ssh-copy-id -i /home/yfbd/.ssh/id_rsa.pub yfbd-virtual-machine-03
配置环境变量
vim /etc/profile
#JAVA_HOME
export JAVA_HOME=/home/yfbd/bigdata/jdk1.8
export PATH=$PATH:$JAVA_HOME/bin
Hadoop HA部署
1.配置core-site.xml
vim /home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop/core-site.xml
fs.defaultFS
hdfs://ns
hadoop.tmp.dir
/home/yfbd/bigdata/hadoop-3.1.4/data/tmp
hadoop.http.staticuser.user
yfbd
hadoop.proxyuser.yfbd.hosts
*
hadoop.proxyuser.yfbd.groups
*
ha.zookeeper.quorum
yfbd-virtual-machine-01:2181,yfbd-virtual-machine-02:2181,yfbd-virtual-machine-03:2181
2.配置 hdfs-site.xml
dfs.replication
3
dfs.nameservices
ns
dfs.ha.namenodes.ns
nn1,nn2
dfs.namenode.rpc-address.ns.nn1
yfbd-virtual-machine-01:8020
dfs.namenode.http-address.ns.nn1
yfbd-virtual-machine-01:50070
dfs.namenode.rpc-address.ns.nn2
yfbd-virtual-machine-02:8020
dfs.namenode.http-address.ns.nn2
yfbd-virtual-machine-02:50070
dfs.namenode.shared.edits.dir
qjournal://yfbd-virtual-machine-01:8485;yfbd-virtual-machine-02:8485;yfbd-virtual-machine-03:8485/ns
dfs.journalnode.edits.dir
/home/yfbd/bigdata/hadoop-3.1.4/data/journal
dfs.ha.automatic-failover.enabled
true
dfs.client.failover.proxy.provider.ns
org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
dfs.ha.fencing.methods
sshfence
dfs.permissions.enabled
false
3.配置yarn-site.xml
yarn.nodemanager.aux-services
mapreduce_shuffle
yarn.resourcemanager.ha.enabled
true
yarn.resourcemanager.cluster-id
rmcluster
yarn.resourcemanager.ha.rm-ids
rm1,rm2,rm3
yarn.resourcemanager.hostname.rm1
yfbd-virtual-machine-01
yarn.resourcemanager.hostname.rm2
yfbd-virtual-machine-02
yarn.resourcemanager.hostname.rm3
yfbd-virtual-machine-03
yarn.resourcemanager.zk-address
yfbd-virtual-machine-01:2181,yfbd-virtual-machine-02:2181,yfbd-virtual-machine-03:2181
yarn.resourcemanager.recovery.enabled
true
yarn.resourcemanager.store.class
org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore
yarn.nodemanager.env-whitelist
JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME
yarn.log-aggregation-enable
true
yarn.log.server.url
http://yfbd-virtual-machine-01:19888/jobhistory/logs
yarn.log-aggregation.retain-seconds
604800
yarn.resourcemanager.ha.id
rm1
yarn.nodemanager.pmem-check-enabled
false
yarn.nodemanager.vmem-check-enabled
false
4.配置workers
yfbd-virtual-machine-01
yfbd-virtual-machine-02
yfbd-virtual-machine-03
5.格式化namenode
cd /home/yfbd/bigdata/hadoop-3.1.4/bin
./hadoop namenode -format
6.开启集群
cd /home/yfbd/bigdata/hadoop-3.1.4/sbin
./start-all.sh
hive 部署
1.配置环境变量
export HADOOP_HOME=/home/yfbd/bigdata/hadoop-3.1.4
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=${HADOOP_HOME}
export HADOOP_COMMON_HOME=${HADOOP_HOME}
export HADOOP_HDFS_HOME=${HADOOP_HOME}
export HADOOP_YARN_HOME=${HADOOP_HOME}
export HADOOP_CONF_DIR=/home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export ZOOKEEPER_HOME=/home/yfbd/bigdata/zookeeper
export PATH=$PATH:$ZOOKEEPER_HOME/bin
export HIVE_HOME=/home/yfbd/bigdata/hive3.1.2
export PATH=$PATH:$HIVE_HOME/bin
2.配置hive-site.xml
hive.metastore.warehouse.dir
/user/hive_remote/warehouse
设置hdfs中的默认目录
javax.jdo.option.ConnectionURL
jdbc:mysql://10.216.3.17:3306/hive3_remote?createDatabaseIfNotExist=true&useSSL=false&allowPublicKeyRetrieval=true
保存元数据的数据库连接
javax.jdo.option.ConnectionDriverName
com.mysql.cj.jdbc.Driver
数据库驱动,需要拷贝到${HIVE_HOME}/lib目录
javax.jdo.option.ConnectionUserName
hive3
用户名和密码
javax.jdo.option.ConnectionPassword
123456
用户名和密码
hive.cli.print.header
true
hive.cli.print.current.db
true
spark.home
/home/yfbd/bigdata/spark-3.1.3-bin-hadoop3.2
hive.aux.jars.path
file:///home/yfbd/bigdata/hive3.1.2/lib
hive.metastore.uris
thrift://yfbd-virtual-machine-01:9083
metastore地址
3.初始化hive元数据
cd /home/yfbd/bigdata/hive3.1.2/bin
schematool -dbType mysql -initSchema
4.启动Metastore服务
hive --service metastore
spark master-slave部署
1.下载spark包
https://mirrors.tuna.tsinghua.edu.cn/apache/spark/spark-3.1.2/spark-3.1.2-bin-hadoop3.2.tgz
2.解压
tar -zxvf spark-3.1.2-bin-hadoop3.2.tgz
3.添加Hadoop配置文件软链接
ln -s /home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop/core-site.xml
ln -s /home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop/hdfs-site.xml
4.添加hive-site.xml配置文件
vim hive-site.xml
hive.metastore.warehouse.dir
/user/hive_remote/warehouse
设置hdfs中的默认目录
javax.jdo.option.ConnectionURL
jdbc:mysql://10.216.3.17:3306/hive3_remote?createDatabaseIfNotExist=true&useSSL=false&allowPublicKeyRetrieval=true
保存元数据的数据库连接
javax.jdo.option.ConnectionDriverName
com.mysql.cj.jdbc.Driver
数据库驱动,需要拷贝到${HIVE_HOME}/lib目录
javax.jdo.option.ConnectionUserName
hive3
用户名和密码
javax.jdo.option.ConnectionPassword
123456
用户名和密码
hive.cli.print.header
true
hive.cli.print.current.db
true
spark.home
/home/yfbd/bigdata/spark-3.1.3-bin-hadoop3.2
hive.aux.jars.path
file:///home/yfbd/bigdata/hive3.1.2/lib
5.编辑spark-env.sh
export JAVA_HOME=/home/yfbd/bigdata/jdk1.8
export HADOOP_HOME=/home/yfbd/bigdata/hadoop-3.1.4
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SPARK_MASTER_HOST=yfbd-virtual-machine-01
export SPARK_MASTER_WEBUI_PORT=8060
export SPARK_WORKER_WEBUI_PORT=8061
export SPARK_WORKER_MEMORY=500m
export SPARK_WORKER_CORES=1
export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER
-Dspark.deploy.zookeeper.url=yfbd-virtual-machine-01:2181,yfbd-virtual-machine-02:2181,yfbd-virtual-machine-03:2181
-Dspark.deploy.zookeeper.dir=/opt/hadoop/data/zookeeper/spark"
export HIVE_HOME=/home/yfbd/bigdata/hive3.1.2
export YARN_CONF_DIR=/home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop
6.编辑workers
yfbd-virtual-machine-01
yfbd-virtual-machine-02
yfbd-virtual-machine-03
7.编辑spark-defaults.conf
spark.sql.hive.metastore.version 3.1.2
spark.sql.hive.metastore.jars path
spark.sql.hive.metastore.jars.path file:///home/yfbd/bigdata/hive3.1.2/lib/*.jar
spark.sql.uris thrift://yfbd-virtual-machine-01:9083
8.启动spark
cd /home/yfbd/bigdata/spark-3.1.3-bin-hadoop3.2/sbin
./start-all.sh
kyuubi部署
1.下载kyuubi包,解压
https://dlcdn.apache.org/incubator/kyuubi/kyuubi-1.5.1-incubating/apache-kyuubi-1.5.1-incubating-bin.tgz
tar -zxvf apache-kyuubi-1.5.1-incubating-bin.tgz
2.配置kyuubi-defaults.conf
cp kyuubi-defaults.conf.template kyuubi-defaults.conf
kyuubi.ha.zookeeper.quorum=yfbd-virtual-machine-01:2181,yfbd-virtual-machine-02:2181,yfbd-virtual-machine-03:2181
kyuubi.authentication=NONE
kyuubi.engine.share.level=USER
kyuubi.frontend.bind.host=0.0.0.0
kyuubi.frontend.bind.port=10009
kyuubi.ha.zookeeper.namespace=kyuubi
kyuubi.session.engine.idle.timeout=PT10H
spark.master=yarn
spark.submit.deployMode=cluster
spark.dynamicAllocation.enabled=true
spark.dynamicAllocation.minExecutors=0
spark.dynamicAllocation.maxExecutors=20
spark.dynamicAllocation.executorIdleTimeout=60
spark.shuffle.service.enabled=true
3.配置kyuubi-env.sh
cp kyuubi-env.sh.template kyuubi-env.sh
export JAVA_HOME=/home/yfbd/bigdata/jdk1.8
export SPARK_HOME=/home/yfbd/bigdata/spark-3.1.3-bin-hadoop3.2
export SPARK_CONF_DIR=${SPARK_HOME}/conf
export HADOOP_CONF_DIR=/home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop
export KYUUBI_MAX_LOG_FILES=10
4.配置hive-site.xml
cp /home/yfbd/bigdata/hive3.1.2/conf/hive-site.xml /home/yfbd/bigdata/apache-kyuubi-1.5.1-incubating-bin/conf/
5.启动kyuubi
cd /home/yfbd/bigdata/apache-kyuubi-1.5.1-incubating-bin/bin
./kyuubi start