Hadoop-HA-Hive-on-Spark 4台虚拟机安装配置文件

Hadoop-HA-Hive-on-Spark 4台虚拟机安装配置文件

  • 版本号
  • 步骤
  • hadoop
    • core-site.xml
    • hdfs-site.xml
    • mapred-site.xml
    • slaves
    • workers
    • yarn-site.xml
  • hive
    • hive-site.xml
    • spark-defaults.conf
  • spark
    • hdfs-site.xml
    • hive-site.xml
    • slaves
    • yarn-site.xml
    • spark-env.sh

版本号

apache-hive-3.1.3-bin.tar
spark-3.0.0-bin-hadoop3.2.tgz
hadoop-3.1.3.tar.gz

步骤

在hdfs上新建
spark-history(设置权限777),spark-jars文件夹
上传jar到hdfs

hdfs dfs -D dfs.replication=1 -put ./* /spark-jars

hadoop

core-site.xml







<configuration>
        
        <property>
                <name>fs.defaultFSname>
                <value>hdfs://haclustervalue>
        property>

        
        <property>
                <name>hadoop.tmp.dirname>
                <value>file:///opt/hadoop-3.1.3/tmpvalue>
        property>

         <property>
                <name>io.file.buffer.sizename>
                <value>4096value>
        property>

        
        <property>
                <name>ha.zookeeper.quorumname>
                <value>node15:2181,node16:2181,node17:2181,node18:2181value>
        property>

        
        <property>
                <name>hadoop.proxyuser.root.hostsname>
                <value>*value>
        property>

        
        <property>
                <name>hadoop.proxyuser.root.groupsname>
                <value>*value>
        property>
        
        <property>
                <name>hadoop.http.staticuser.username>
                <value>rootvalue>
        property>
configuration>

hdfs-site.xml







<configuration>
	<property> 
		 
		<name>dfs.block.sizename> 
		<value>134217728value> 
	property> 

	<property>
	    <name>dfs.nameservicesname>
	    <value>activeNodevalue>
	property>	

	<property> 
		 
		<name>dfs.replicationname> 
		<value>3value> 
	property> 
	
	<property> 
		 
		<name>dfs.name.dirname> 
		<value>file:///opt/hadoop-3.1.3/dfs/namenode_datavalue> 
	property>
	
	<property> 
		 
		<name>dfs.data.dirname> 
		<value>file:///opt/hadoop-3.1.3/dfs/datanode_datavalue> 
	property>
	
	<property>
		 
		<name>dfs.webhdfs.enabledname> 
		<value>truevalue> 
	property> 
	
	<property> 
		 
		<name>dfs.datanode.max.transfer.threadsname> 
		<value>4096value> 
	property> 
	
	<property> 
		 
		<name>dfs.nameservicesname> 
		<value>haclustervalue> 
	property> 
	
	<property> 
		 
		<name>dfs.ha.namenodes.haclustername> 
		<value>nn1,nn2value> 
	property> 
	
	 
	<property> 
		<name>dfs.namenode.rpc-address.hacluster.nn1name> 
		<value>node15:9000value> 
	property>
	
	<property> 
		<name>dfs.namenode.servicepc-address.hacluster.nn1name> 
		<value>node15:53310value> 
	property> 
	
	<property> 
		<name>dfs.namenode.http-address.hacluster.nn1name> 
		<value>node15:50070value> 
	property> 
	
	 
	<property> 
		<name>dfs.namenode.rpc-address.hacluster.nn2name> 
		<value>node16:9000value> 
	property> 
	
	<property> 
		<name>dfs.namenode.servicepc-address.hacluster.nn2name> 
		<value>node16:53310value> 
	property> 
	
	<property> 
		<name>dfs.namenode.http-address.hacluster.nn2name> 
		<value>node16:50070value> 
	property> 
	
	<property> 
	 
		<name>dfs.namenode.shared.edits.dirname> 
		<value>qjournal://node15:8485;node16:8485;node17:8485;node18:8485/haclustervalue> 
	property> 
	
	<property> 
		 
		<name>dfs.journalnode.edits.dirname> 
		<value>/opt/hadoop-3.1.3/dfs/journalnode_datavalue> 
	property> 
	
	<property> 
		 
		<name>dfs.namenode.edits.dirname> 
		<value>/opt/hadoop-3.1.3/dfs/editsvalue> 
	property> 
	
	<property> 
		 
		<name>dfs.ha.automatic-failover.enabledname> 
		<value>truevalue> 
	property> 
	
	<property> 
		 
		<name>dfs.client.failover.proxy.provider.haclustername> 
		<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvidervalue> 
	property> 
	
	<property> 
		 
		<name>dfs.ha.fencing.methodsname> 
		<value>sshfencevalue> 
	property> 
	
	<property> 
		 
		<name>dfs.ha.fencing.ssh.private-key-filesname> 
		<value>/root/.ssh/id_rsavalue>
	property> 
	
	<property> 
		 
		<name>dfs.premissionsname> 
		<value>falsevalue> 
	property> 

configuration>

mapred-site.xml







<configuration>
	
	<property>        
		<name>mapred.job.trackername>                  
		<value>node15:9001value>       
	property>

	<property>
		<name>mapreduce.framework.namename>
		<value>yarnvalue>
	property>
	
	<property>
		<name>mapreduce.jobhistory.addressname>
		<value>node15:10020value>
	property>
	
	<property>
		<name>mapreduce.jobhistory.webapp.addressname>
		<value>node15:19888value>
	property>
	<property>
		<name>yarn.application.classpathname>
		<value>/opt/hadoop-3.1.3/etc/hadoop:/opt/hadoop-3.1.3/share/hadoop/common/lib/*:/opt/hadoop-3.1.3/share/hadoop/common/*:/opt/hadoop-3.1.3/share/hadoop/hdfs:/opt/hadoop-3.1.3/share/hadoop/hdfs/lib/*:/opt/hadoop-3.1.3/share/hadoop/hdfs/*:/opt/hadoop-3.1.3/share/hadoop/mapreduce/lib/*:/opt/hadoop-3.1.3/share/hadoop/mapreduce/*:/opt/hadoop-3.1.3/share/hadoop/yarn:/opt/hadoop-3.1.3/share/hadoop/yarn/lib/*:/opt/hadoop-3.1.3/share/hadoop/yarn/*value>
	property>

	<property>
		<name>mapreduce.framework.namename>
		<value>yarnvalue>
	property>
	 
	<property>
		<name>mapreduce.map.memory.mbname>
		<value>1500value>
		<description>每个Map任务的物理内存限制description>
	property>
	 
	<property>
		<name>mapreduce.reduce.memory.mbname>
		<value>3000value>
		<description>每个Reduce任务的物理内存限制description>
	property>
	 
	<property>
		<name>mapreduce.map.java.optsname>
		<value>-Xmx1200mvalue>
	property>
	 
	<property>
		<name>mapreduce.reduce.java.optsname>
		<value>-Xmx2600mvalue>
	property>
	<property>
		<name>mapreduce.framework.namename>
		<value>yarnvalue>
	property>

configuration>


slaves

node15
node16
node17
node18

workers

node15
node16
node17
node18

yarn-site.xml



<configuration>
	<property>
		
		<name>yarn.nodemanager.vmem-check-enabledname>
		<value>falsevalue>
		<description>Whether virtual memory limits will be enforced for containersdescription>
	property>
	<property>
		
		<name>yarn.nodemanager.vmem-pmem-rationame>
		<value>4value>
		<description>Ratio between virtual memory to physical memory when setting memory limits for containersdescription>
	property>

	<property> 
		 
		<name>yarn.resourcemanager.ha.enabledname> 
		<value>truevalue> 
	property> 
	
	<property> 
		 
		<name>yarn.resourcemanager.cluster-idname> 
		<value>hayarnvalue> 
	property> 
	
	<property> 
		 
		<name>yarn.resourcemanager.ha.rm-idsname> 
		<value>rm1,rm2value> 
	property> 
	
	<property> 
		 
		<name>yarn.resourcemanager.hostname.rm1name> 
		<value>node15value> 
	property>
	
	<property> 
		 
		<name>yarn.resourcemanager.hostname.rm2name> 
		<value>node16value> 
	property> 

	<property>
		
		<name>yarn.resourcemanager.webapp.address.rm1name>
		<value>node15:8088value>
	property>
	<property>
		
		<name>yarn.resourcemanager.webapp.address.rm2name>
		<value>node16:8088value>
	property>
	
	<property> 
		 
		<name>yarn.resourcemanager.zk-addressname> 
		<value>node15:2181,node16:2181,node17:2181value> 
	property> 
	
	<property> 
		 
		<name>yarn.resourcemanager.recovery.enabledname> 
		<value>truevalue> 
	property> 
	
	<property> 
		 
		<name>yarn.resourcemanager.store.classname> 
		<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStorevalue> 
	property> 
	
	<property> 
		 
		<name>yarn.resourcemanager.hostnamename> 
		<value>node18value> 
	property> 
	
	<property> 
		 
		<name>yarn.nodemanager.aux-servicesname> 
		<value>mapreduce_shufflevalue> 
	property> 
	
	<property> 
		 
		<name>yarn.log-aggregation-enablename> 
		<value>truevalue> 
	property> 
	
	<property> 
		 
		<name>yarn.log-aggregation.retain-secondsname> 
		<value>604800value> 
	property> 
	<property>
    		<name>yarn.log.server.urlname>
    		<value>http://node15:19888/jobhistory/logsvalue>
	property>
configuration>

hive

hive-site.xml



<configuration>
    
    <property>
        <name>javax.jdo.option.ConnectionURLname>
        <value>jdbc:mysql://node15:3306/metastore?useSSL=falsevalue>
        property>

    
    <property>
        <name>javax.jdo.option.ConnectionDriverNamename>
        <value>com.mysql.jdbc.Drivervalue>
        property>

        
    <property>
        <name>javax.jdo.option.ConnectionUserNamename>
        <value>rootvalue>
    property>

    
    <property>
        <name>javax.jdo.option.ConnectionPasswordname>
        <value>hadoopvalue>
        property>

    
    <property>
        <name>hive.metastore.warehouse.dirname>
        <value>/user/hive/warehousevalue>
    property>

   
    <property>
        <name>hive.metastore.schema.verificationname>
        <value>falsevalue>
    property>

    
    <property>
        <name>hive.metastore.event.db.notification.api.authname>
        <value>falsevalue>
    property>

    
    <property>
        <name>hive.server2.thrift.bind.hostname>
        <value>node15value>
    property>

    
    <property>
        <name>hive.server2.thrift.portname>
        <value>10000value>
    property>

    <property>
        <name>spark.yarn.jarsname>
        <value>hdfs://node15:9000/spark-jars/*value>
    property>

    
    <property>
        <name>hive.execution.enginename>
        <value>sparkvalue>
    property>
    <property>
        <name>spark.homename>
        <value>/opt/spark-3.0.0-bin-hadoop3.2/value>
    property>
configuration>


spark-defaults.conf

spark.master                               yarn
spark.eventLog.enabled                   true
spark.eventLog.dir                        hdfs://node15:9000/spark-history
spark.executor.memory                    600m
spark.driver.memory                     600m

spark

hdfs-site.xml

链接hadoop中的文件
ln -s 源文件名 新文件名

hive-site.xml

链接hive中的文件
ln -s 源文件名 新文件名

slaves

node15
node16
node17
node18

yarn-site.xml

链接hadoop中的文件
ln -s 源文件名 新文件名

spark-env.sh

#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


export SCALA_HOME=/usr/share/scala
export JAVA_HOME=/usr/java/jdk1.8.0_241-amd64
export SPARK_HOME=/opt/spark-3.0.0-bin-hadoop3.2
export SPARK_MASTER_IP=192.168.206.215
export SPARK_MASTER_PORT=7077
export SPARK_MASTER_WEBUI_PORT=7080     #spark的web访问端口默认是8080,防止可能存在端口冲突,可以修
改端口号为其他的export SPARK_WORKER_CORES=1
export SPARK_WORKER_INSTANCES=1
export SPARK_EXECUTOR_MEMORY=512M
export SPARK_WORKER_MEMORY=1G
export SPARK_DIST_CLASSPATH=$(/opt/hadoop-3.1.3/bin/hadoop classpath)
export HADOOP_CONF_DIR=/opt/hadoop-3.1.3/etc/hadoop


# This file is sourced when running various Spark programs.
# Copy it as spark-env.sh and edit that to configure Spark for your site.

# Options read when launching programs locally with
# ./bin/run-example or ./bin/spark-submit
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program

# Options read by executors and drivers running inside the cluster
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos

# Options read in YARN client/cluster mode
# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf)
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
# - YARN_CONF_DIR, to point Spark towards YARN configuration files when you use YARN
# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)

# Options for the daemons used in the standalone deploy mode
# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname
# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2
g)# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
# - SPARK_WORKER_DIR, to set the working directory of worker processes
# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 
1g).# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y
")# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons
# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers

# Options for launcher
# - SPARK_LAUNCHER_OPTS, to set config properties and Java options for the launcher (e.g. "-Dx=y")

# Generic options for the daemons used in the standalone deploy mode
# - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)
# - SPARK_LOG_DIR       Where log files are stored.  (Default: ${SPARK_HOME}/logs)
# - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)
# - SPARK_IDENT_STRING  A string representing this instance of spark. (Default: $USER)
# - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)
# - SPARK_NO_DAEMONIZE  Run the proposed command in the foreground. It will not output a PID file.
# Options for native BLAS, like Intel MKL, OpenBLAS, and so on.
# You might get better performance to enable these options if using native BLAS (see SPARK-21305).
# - MKL_NUM_THREADS=1        Disable multi-threading of Intel MKL
# - OPENBLAS_NUM_THREADS=1   Disable multi-threading of OpenBLAS

你可能感兴趣的:(hadoop,hadoop,hive,spark)