java 远程 yarn jar_在 idea 中以 yarn-client 远程提交 Spark 作业

RemoteSubmitApp 主类

package com.cloudera

import org.apache.kafka.clients.consumer.ConsumerConfig

import org.apache.kafka.common.serialization.StringDeserializer

import org.apache.log4j.Logger

import org.apache.spark

import org.apache.spark.rdd.RDD

import org.apache.spark.streaming.dstream.DStream

import org.apache.spark.{SparkConf, rdd}

import org.apache.spark.streaming.{Seconds, StreamingContext}

import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}

object RemoteSubmitApp {

val logger = Logger.getLogger(this.getClass)

def main(args: Array[String]): Unit = {

// 设置提交任务的用户

// System.setProperty("HADOOP_USER_NAME", "root")

val conf = new SparkConf().setAppName("Remote_Submit_App")

// 设置yarn-client模式提交

.setMaster("yarn-client") // 设置resourcemanager的ip

.set("yarn.resourcemanager.hostname", "cdh02")

// 设置driver的内存大小

.set("spark.driver.memory", "1024M")

// 设置executor的内存大小

.set("spark.executor.memory", "800M")

// 设置executor的个数

.set("spark.executor.instance", "2")

// 设置提交任务的 yarn 队列

// .set("spark.yarn.queue", "defalut")

// 设置driver的 ip 地址,即本机的 ip 地址

.set("spark.driver.host", "192.168.1.26")

// 设置序列化

// .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

// 设置jar包的路径,如果有其他的依赖包,可以在这里添加,逗号隔开

.setJars(List("E:\\RemoteSubmitSparkToYarn\\target\\RemoteSubmitSparkToYarn-1.0-SNAPSHOT.jar"))

val scc = new StreamingContext(conf, Seconds(30))

scc.sparkContext.setLogLevel("WARN")

// scc.checkpoint("checkpoint")

val topic = "remote_submit_test"

val topicSet = topic.split(",").toSet

val kafkaParams = Map[String, Object](

ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "10.101.75.190:9092,10.101.75.191:9092,10.101.75.192:9092",

ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],

ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],

ConsumerConfig.GROUP_ID_CONFIG -> "remote_test",

ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "earliest",

ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean)

)

val kafkaStreams = KafkaUtils.createDirectStream[String, String](

scc,

LocationStrategies.PreferConsistent,

ConsumerStrategies.Subscribe[String, String](topicSet, kafkaParams)

)

val wordCounts: DStream[(String, Long)] = kafkaStreams.map(_.value())

.flatMap(_.split(" "))

.map(x => (x, 1L))

.reduceByKey(_ + _)

wordCounts.print()

//启动流

scc.start()

scc.awaitTermination()

}

}

pom.xml 文件

xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"

xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

4.0.0

com.cloudera

RemoteSubmitSparkToYarn

1.0-SNAPSHOT

jar

RemoteSubmitSparkToYarn

cloudera

https://repository.cloudera.com/artifactory/cloudera-repos/

Cloudera Repositories

true

false

UTF-8

UTF-8

1.8

2.2.0

compile

org.scala-lang

scala-library

2.11.7

${provided.scope}

org.apache.spark

spark-core_2.11

${spark.version}

${provided.scope}

org.apache.spark

spark-streaming_2.11

${spark.version}

${provided.scope}

org.apache.spark

spark-sql_2.11

${spark.version}

${provided.scope}

org.apache.spark

spark-hive_2.11

${spark.version}

${provided.scope}

org.apache.spark

spark-yarn_2.11

${spark.version}

${provided.scope}

org.apache.spark

spark-sql-kafka-0-10_2.11

${spark.version}

${provided.scope}

org.apache.spark

spark-streaming-kafka-0-10_2.11

${spark.version}

${provided.scope}

org.apache.kafka

kafka_2.11

0.10.0.1

org.apache.kafka

kafka-clients

0.11.0.2

org.apache.maven.plugins

maven-compiler-plugin

3.8.0

1.8

1.8

org.apache.maven.plugins

maven-resources-plugin

3.0.2

UTF-8

net.alchim31.maven

scala-maven-plugin

3.2.2

compile

testCompile

org.apache.maven.plugins

maven-resources-plugin

3.0.2

UTF-8

net.alchim31.maven

scala-maven-plugin

scala-compile-first

process-resources

add-source

compile

scala-test-compile

process-test-resources

testCompile

org.apache.maven.plugins

maven-compiler-plugin

compile

compile

org.apache.maven.plugins

maven-shade-plugin

2.4.3

package

shade

*:*

META-INF/*.SF

META-INF/*.DSA

META-INF/*.RSA

${basedir}/src/main/resources

env/*/*

**/*

${basedir}/src/main/resources/env/${profile.active}

**/*.properties

**/*.xml

dev

dev

true

test

test

prod

prod

运行结果

......

Connected to the target VM, address: '127.0.0.1:49723', transport: 'socket'

Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties

19/09/27 15:32:47 INFO SparkContext: Running Spark version 2.2.0

19/09/27 15:32:47 WARN SparkConf: spark.master yarn-client is deprecated in Spark 2.0+, please instead use "yarn" with specified deploy mode.

19/09/27 15:32:47 INFO SparkContext: Submitted application: Remote_Submit_App

19/09/27 15:32:47 INFO SecurityManager: Changing view acls to: 110610172

19/09/27 15:32:47 INFO SecurityManager: Changing modify acls to: 110610172

19/09/27 15:32:47 INFO SecurityManager: Changing view acls groups to:

19/09/27 15:32:47 INFO SecurityManager: Changing modify acls groups to:

19/09/27 15:32:47 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(110610172); groups with view permissions: Set(); users with modify permissions: Set(110610172); groups with modify permissions: Set()

19/09/27 15:32:48 INFO Utils: Successfully started service 'sparkDriver' on port 49747.

19/09/27 15:32:48 INFO SparkEnv: Registering MapOutputTracker

19/09/27 15:32:48 INFO SparkEnv: Registering BlockManagerMaster

19/09/27 15:32:48 INFO BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information

19/09/27 15:32:48 INFO BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up

19/09/27 15:32:48 INFO DiskBlockManager: Created local directory at C:\Users\110610172\AppData\Local\Temp\blockmgr-c580e3ec-3b0f-4365-8766-387e0c4a3947

19/09/27 15:32:48 INFO MemoryStore: MemoryStore started with capacity 1989.6 MB

19/09/27 15:32:48 INFO SparkEnv: Registering OutputCommitCoordinator

19/09/27 15:32:48 INFO Utils: Successfully started service 'SparkUI' on port 4040.

19/09/27 15:32:48 INFO SparkUI: Bound SparkUI to 0.0.0.0, and started at http://192.168.1.26:4040

19/09/27 15:32:48 INFO SparkContext: Added JAR E:\RemoteSubmitSparkToYarn\target\RemoteSubmitSparkToYarn-1.0-SNAPSHOT.jar at spark://192.168.1.26:49747/jars/RemoteSubmitSparkToYarn-1.0-SNAPSHOT.jar with timestamp 1569569568596

19/09/27 15:32:50 INFO ConfiguredRMFailoverProxyProvider: Failing over to rm381

19/09/27 15:32:50 INFO Client: Requesting a new application from cluster with 7 NodeManagers

19/09/27 15:32:50 INFO Client: Verifying our application has not requested more than the maximum memory capability of the cluster (12288 MB per container)

19/09/27 15:32:50 INFO Client: Will allocate AM container, with 896 MB memory including 384 MB overhead

19/09/27 15:32:50 INFO Client: Setting up container launch context for our AM

19/09/27 15:32:50 INFO Client: Setting up the launch environment for our AM container

19/09/27 15:32:50 INFO Client: Preparing resources for our AM container

19/09/27 15:32:51 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME.

19/09/27 15:32:54 INFO Client: Uploading resource file:/C:/Users/110610172/AppData/Local/Temp/spark-46819e6c-4520-4e75-b7b0-0374e0020d36/__spark_libs__4420363360244802432.zip -> hdfs://cdh01:8020/user/110610172/.sparkStaging/application_1568096913481_0456/__spark_libs__4420363360244802432.zip

19/09/27 15:32:54 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable

19/09/27 15:32:57 INFO Client: Uploading resource file:/C:/Users/110610172/AppData/Local/Temp/spark-46819e6c-4520-4e75-b7b0-0374e0020d36/__spark_conf__4989294758151956703.zip -> hdfs://cdh01:8020/user/110610172/.sparkStaging/application_1568096913481_0456/__spark_conf__.zip

19/09/27 15:32:57 INFO SecurityManager: Changing view acls to: 110610172

19/09/27 15:32:57 INFO SecurityManager: Changing modify acls to: 110610172

19/09/27 15:32:57 INFO SecurityManager: Changing view acls groups to:

19/09/27 15:32:57 INFO SecurityManager: Changing modify acls groups to:

19/09/27 15:32:57 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(110610172); groups with view permissions: Set(); users with modify permissions: Set(110610172); groups with modify permissions: Set()

19/09/27 15:32:57 INFO Client: Submitting application application_1568096913481_0456 to ResourceManager

19/09/27 15:32:57 INFO YarnClientImpl: Submitted application application_1568096913481_0456

19/09/27 15:32:57 INFO SchedulerExtensionServices: Starting Yarn extension services with app application_1568096913481_0456 and attemptId None

19/09/27 15:32:58 INFO Client: Application report for application_1568096913481_0456 (state: ACCEPTED)

19/09/27 15:32:58 INFO Client:

client token: N/A

diagnostics: N/A

ApplicationMaster host: N/A

ApplicationMaster RPC port: -1

queue: root.users.110610172

start time: 1569569577390

final status: UNDEFINED

tracking URL: http://cdh02:8088/proxy/application_1568096913481_0456/

user: 110610172

19/09/27 15:32:59 INFO Client: Application report for application_1568096913481_0456 (state: ACCEPTED)

19/09/27 15:33:00 INFO Client: Application report for application_1568096913481_0456 (state: ACCEPTED)

19/09/27 15:33:01 INFO Client: Application report for application_1568096913481_0456 (state: ACCEPTED)

19/09/27 15:33:01 INFO YarnSchedulerBackend$YarnSchedulerEndpoint: ApplicationMaster registered as NettyRpcEndpointRef(spark-client://YarnAM)

19/09/27 15:33:01 INFO YarnClientSchedulerBackend: Add WebUI Filter. org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter, Map(PROXY_HOSTS -> cdh01,cdh02, PROXY_URI_BASES -> http://cdh01:8088/proxy/application_1568096913481_0456,http://cdh02:8088/proxy/application_1568096913481_0456), /proxy/application_1568096913481_0456

19/09/27 15:33:01 INFO JettyUtils: Adding filter: org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter

19/09/27 15:33:02 INFO Client: Application report for application_1568096913481_0456 (state: RUNNING)

19/09/27 15:33:02 INFO Client:

client token: N/A

diagnostics: N/A

ApplicationMaster host: 10.101.75.194

ApplicationMaster RPC port: 0

queue: root.users.110610172

start time: 1569569577390

final status: UNDEFINED

tracking URL: http://cdh02:8088/proxy/application_1568096913481_0456/

user: 110610172

19/09/27 15:33:02 INFO YarnClientSchedulerBackend: Application application_1568096913481_0456 has started running.

19/09/27 15:33:02 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 49796.

19/09/27 15:33:02 INFO NettyBlockTransferService: Server created on 192.168.1.26:49796

19/09/27 15:33:02 INFO BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy

19/09/27 15:33:02 INFO BlockManagerMaster: Registering BlockManager BlockManagerId(driver, 192.168.1.26, 49796, None)

19/09/27 15:33:02 INFO BlockManagerMasterEndpoint: Registering block manager 192.168.1.26:49796 with 1989.6 MB RAM, BlockManagerId(driver, 192.168.1.26, 49796, None)

19/09/27 15:33:02 INFO BlockManagerMaster: Registered BlockManager BlockManagerId(driver, 192.168.1.26, 49796, None)

19/09/27 15:33:02 INFO BlockManager: Initialized BlockManager: BlockManagerId(driver, 192.168.1.26, 49796, None)

19/09/27 15:33:07 INFO YarnSchedulerBackend$YarnDriverEndpoint: Registered executor NettyRpcEndpointRef(spark-client://Executor) (10.101.75.190:10332) with ID 1

19/09/27 15:33:07 INFO BlockManagerMasterEndpoint: Registering block manager cdh04:24916 with 246.9 MB RAM, BlockManagerId(1, cdh04, 24916, None)

19/09/27 15:33:07 INFO YarnSchedulerBackend$YarnDriverEndpoint: Registered executor NettyRpcEndpointRef(spark-client://Executor) (10.101.75.190:10334) with ID 2

19/09/27 15:33:08 INFO BlockManagerMasterEndpoint: Registering block manager cdh04:27337 with 246.9 MB RAM, BlockManagerId(2, cdh04, 27337, None)

19/09/27 15:33:08 INFO YarnClientSchedulerBackend: SchedulerBackend is ready for scheduling beginning after reached minRegisteredResourcesRatio: 0.8

19/09/27 15:33:08 WARN KafkaUtils: overriding enable.auto.commit to false for executor

19/09/27 15:33:08 WARN KafkaUtils: overriding auto.offset.reset to none for executor

19/09/27 15:33:08 WARN KafkaUtils: overriding executor group.id to spark-executor-remote_test

19/09/27 15:33:08 WARN KafkaUtils: overriding receive.buffer.bytes to 65536 see KAFKA-3135

-------------------------------------------

Time: 1569569610000 ms

-------------------------------------------

(assigned,10)

(serializer,2)

(Setting,10)

(rdd.count(),1)

(class,2)

(=,2)

(newly,10)

(partitions,10)

-------------------------------------------

Time: 1569569640000 ms

-------------------------------------------

-------------------------------------------

Time: 1569569670000 ms

-------------------------------------------

......

集群上查看

Yarn --> 应用程序

image

你可能感兴趣的:(java,远程,yarn,jar)