ubuntu16.04 spark连接hadoop配置

1.spark配置historyserver
2.hadoop配置yarn
3.hadoop添加配置
cd /usr/share/hadoop/hadoop-2.7.7/etc/hadoop

配置yarn-site.xml

sudo vim yarn-site.xml


        
                yarn.nodemanager.aux-services
                mapreduce_shuffle
        
        
                yarn.nodemanager.aux-services.mapreduce.shuffle.class
                org.apache.hadoop.mapred.ShuffleHandler
        


        yarn.log-aggregation-enable
        true


        yarn.log.server.url
        http://你的ip:19888/jobhistory/logs


        yarn.nodemanager.pmem-check-enabled
        false


        yarn.nodemanager.vmem-check-enabled
        false


#保存退出:wq

配置mapred-site.xml

sudo vim mapred-site.xml

        
                mapreduce.framework.name
                yarn
        
        
        
                mapreduce.jobhistory.done-dir
                /user/history/done
        
        
                  mapreduce.jobhistory.intermediate-done-dir
                  /user/history/done_intermediate
         

#保存退出:wq
4.重新启动yarn
cd /usr/share/hadoop/hadoop-2.7.7
./sbin/stop-yarn.sh
./sbin/mr-jobhistory-daemon.sh stop historyserver
./sbin/start-yarn.sh
./sbin/mr-jobhistory-daemon.sh start historyserver
5.配置spark
cd /usr/share/spark/spark-2.2.2-bin-hadoop2.7/conf
sudo vim spark-defaults.conf
spark.yarn.historyServer.address=你的ip:18080
#剩下几个是跟sparkhistory相关的一些配置
spark.history.ui.port=18080
spark.eventLog.enabled=true
spark.eventLog.dir=hdfs:///tmp/spark/events
spark.history.fs.logDirectory=hdfs:///tmp/spark/events
6.HDFS创建spark文件
hdfs dfs -mkdir -p /tmp/spark/events
7.启动sparkhistory
cd  /usr/share/spark/spark-2.2.2-bin-hadoop2.7
./sbin/start-history-server.sh
8.测试

localhost:18080

你可能感兴趣的:(ubuntu16.04 spark连接hadoop配置)