RemoteSubmitApp 主类
package com.cloudera
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.log4j.Logger
import org.apache.spark
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.{SparkConf, rdd}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
object RemoteSubmitApp {
val logger = Logger.getLogger(this.getClass)
def main(args: Array[String]): Unit = {
// 设置提交任务的用户
// System.setProperty("HADOOP_USER_NAME", "root")
val conf = new SparkConf().setAppName("Remote_Submit_App")
// 设置yarn-client模式提交
.setMaster("yarn-client") // 设置resourcemanager的ip
.set("yarn.resourcemanager.hostname", "cdh02")
// 设置driver的内存大小
.set("spark.driver.memory", "1024M")
// 设置executor的内存大小
.set("spark.executor.memory", "800M")
// 设置executor的个数
.set("spark.executor.instance", "2")
// 设置提交任务的 yarn 队列
// .set("spark.yarn.queue", "defalut")
// 设置driver的 ip 地址,即本机的 ip 地址
.set("spark.driver.host", "192.168.1.26")
// 设置序列化
// .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
// 设置jar包的路径,如果有其他的依赖包,可以在这里添加,逗号隔开
.setJars(List("E:\\RemoteSubmitSparkToYarn\\target\\RemoteSubmitSparkToYarn-1.0-SNAPSHOT.jar"))
val scc = new StreamingContext(conf, Seconds(30))
scc.sparkContext.setLogLevel("WARN")
// scc.checkpoint("checkpoint")
val topic = "remote_submit_test"
val topicSet = topic.split(",").toSet
val kafkaParams = Map[String, Object](
ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "10.101.75.190:9092,10.101.75.191:9092,10.101.75.192:9092",
ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
ConsumerConfig.GROUP_ID_CONFIG -> "remote_test",
ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "earliest",
ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean)
)
val kafkaStreams = KafkaUtils.createDirectStream[String, String](
scc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](topicSet, kafkaParams)
)
val wordCounts: DStream[(String, Long)] = kafkaStreams.map(_.value())
.flatMap(_.split(" "))
.map(x => (x, 1L))
.reduceByKey(_ + _)
wordCounts.print()
//启动流
scc.start()
scc.awaitTermination()
}
}
pom.xml 文件
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
4.0.0
com.cloudera
RemoteSubmitSparkToYarn
1.0-SNAPSHOT
jar
RemoteSubmitSparkToYarn
cloudera
https://repository.cloudera.com/artifactory/cloudera-repos/
Cloudera Repositories
true
false
UTF-8
UTF-8
1.8
2.2.0
compile
org.scala-lang
scala-library
2.11.7
${provided.scope}
org.apache.spark
spark-core_2.11
${spark.version}
${provided.scope}
org.apache.spark
spark-streaming_2.11
${spark.version}
${provided.scope}
org.apache.spark
spark-sql_2.11
${spark.version}
${provided.scope}
org.apache.spark
spark-hive_2.11
${spark.version}
${provided.scope}
org.apache.spark
spark-yarn_2.11
${spark.version}
${provided.scope}
org.apache.spark
spark-sql-kafka-0-10_2.11
${spark.version}
${provided.scope}
org.apache.spark
spark-streaming-kafka-0-10_2.11
${spark.version}
${provided.scope}
org.apache.kafka
kafka_2.11
0.10.0.1
org.apache.kafka
kafka-clients
0.11.0.2
org.apache.maven.plugins
maven-compiler-plugin
3.8.0
1.8
1.8
org.apache.maven.plugins
maven-resources-plugin
3.0.2
UTF-8
net.alchim31.maven
scala-maven-plugin
3.2.2
compile
testCompile
org.apache.maven.plugins
maven-resources-plugin
3.0.2
UTF-8
net.alchim31.maven
scala-maven-plugin
scala-compile-first
process-resources
add-source
compile
scala-test-compile
process-test-resources
testCompile
org.apache.maven.plugins
maven-compiler-plugin
compile
compile
org.apache.maven.plugins
maven-shade-plugin
2.4.3
package
shade
*:*
META-INF/*.SF
META-INF/*.DSA
META-INF/*.RSA
${basedir}/src/main/resources
env/*/*
**/*
${basedir}/src/main/resources/env/${profile.active}
**/*.properties
**/*.xml
dev
dev
true
test
test
prod
prod
运行结果
......
Connected to the target VM, address: '127.0.0.1:49723', transport: 'socket'
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
19/09/27 15:32:47 INFO SparkContext: Running Spark version 2.2.0
19/09/27 15:32:47 WARN SparkConf: spark.master yarn-client is deprecated in Spark 2.0+, please instead use "yarn" with specified deploy mode.
19/09/27 15:32:47 INFO SparkContext: Submitted application: Remote_Submit_App
19/09/27 15:32:47 INFO SecurityManager: Changing view acls to: 110610172
19/09/27 15:32:47 INFO SecurityManager: Changing modify acls to: 110610172
19/09/27 15:32:47 INFO SecurityManager: Changing view acls groups to:
19/09/27 15:32:47 INFO SecurityManager: Changing modify acls groups to:
19/09/27 15:32:47 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(110610172); groups with view permissions: Set(); users with modify permissions: Set(110610172); groups with modify permissions: Set()
19/09/27 15:32:48 INFO Utils: Successfully started service 'sparkDriver' on port 49747.
19/09/27 15:32:48 INFO SparkEnv: Registering MapOutputTracker
19/09/27 15:32:48 INFO SparkEnv: Registering BlockManagerMaster
19/09/27 15:32:48 INFO BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information
19/09/27 15:32:48 INFO BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up
19/09/27 15:32:48 INFO DiskBlockManager: Created local directory at C:\Users\110610172\AppData\Local\Temp\blockmgr-c580e3ec-3b0f-4365-8766-387e0c4a3947
19/09/27 15:32:48 INFO MemoryStore: MemoryStore started with capacity 1989.6 MB
19/09/27 15:32:48 INFO SparkEnv: Registering OutputCommitCoordinator
19/09/27 15:32:48 INFO Utils: Successfully started service 'SparkUI' on port 4040.
19/09/27 15:32:48 INFO SparkUI: Bound SparkUI to 0.0.0.0, and started at http://192.168.1.26:4040
19/09/27 15:32:48 INFO SparkContext: Added JAR E:\RemoteSubmitSparkToYarn\target\RemoteSubmitSparkToYarn-1.0-SNAPSHOT.jar at spark://192.168.1.26:49747/jars/RemoteSubmitSparkToYarn-1.0-SNAPSHOT.jar with timestamp 1569569568596
19/09/27 15:32:50 INFO ConfiguredRMFailoverProxyProvider: Failing over to rm381
19/09/27 15:32:50 INFO Client: Requesting a new application from cluster with 7 NodeManagers
19/09/27 15:32:50 INFO Client: Verifying our application has not requested more than the maximum memory capability of the cluster (12288 MB per container)
19/09/27 15:32:50 INFO Client: Will allocate AM container, with 896 MB memory including 384 MB overhead
19/09/27 15:32:50 INFO Client: Setting up container launch context for our AM
19/09/27 15:32:50 INFO Client: Setting up the launch environment for our AM container
19/09/27 15:32:50 INFO Client: Preparing resources for our AM container
19/09/27 15:32:51 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME.
19/09/27 15:32:54 INFO Client: Uploading resource file:/C:/Users/110610172/AppData/Local/Temp/spark-46819e6c-4520-4e75-b7b0-0374e0020d36/__spark_libs__4420363360244802432.zip -> hdfs://cdh01:8020/user/110610172/.sparkStaging/application_1568096913481_0456/__spark_libs__4420363360244802432.zip
19/09/27 15:32:54 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
19/09/27 15:32:57 INFO Client: Uploading resource file:/C:/Users/110610172/AppData/Local/Temp/spark-46819e6c-4520-4e75-b7b0-0374e0020d36/__spark_conf__4989294758151956703.zip -> hdfs://cdh01:8020/user/110610172/.sparkStaging/application_1568096913481_0456/__spark_conf__.zip
19/09/27 15:32:57 INFO SecurityManager: Changing view acls to: 110610172
19/09/27 15:32:57 INFO SecurityManager: Changing modify acls to: 110610172
19/09/27 15:32:57 INFO SecurityManager: Changing view acls groups to:
19/09/27 15:32:57 INFO SecurityManager: Changing modify acls groups to:
19/09/27 15:32:57 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(110610172); groups with view permissions: Set(); users with modify permissions: Set(110610172); groups with modify permissions: Set()
19/09/27 15:32:57 INFO Client: Submitting application application_1568096913481_0456 to ResourceManager
19/09/27 15:32:57 INFO YarnClientImpl: Submitted application application_1568096913481_0456
19/09/27 15:32:57 INFO SchedulerExtensionServices: Starting Yarn extension services with app application_1568096913481_0456 and attemptId None
19/09/27 15:32:58 INFO Client: Application report for application_1568096913481_0456 (state: ACCEPTED)
19/09/27 15:32:58 INFO Client:
client token: N/A
diagnostics: N/A
ApplicationMaster host: N/A
ApplicationMaster RPC port: -1
queue: root.users.110610172
start time: 1569569577390
final status: UNDEFINED
tracking URL: http://cdh02:8088/proxy/application_1568096913481_0456/
user: 110610172
19/09/27 15:32:59 INFO Client: Application report for application_1568096913481_0456 (state: ACCEPTED)
19/09/27 15:33:00 INFO Client: Application report for application_1568096913481_0456 (state: ACCEPTED)
19/09/27 15:33:01 INFO Client: Application report for application_1568096913481_0456 (state: ACCEPTED)
19/09/27 15:33:01 INFO YarnSchedulerBackend$YarnSchedulerEndpoint: ApplicationMaster registered as NettyRpcEndpointRef(spark-client://YarnAM)
19/09/27 15:33:01 INFO YarnClientSchedulerBackend: Add WebUI Filter. org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter, Map(PROXY_HOSTS -> cdh01,cdh02, PROXY_URI_BASES -> http://cdh01:8088/proxy/application_1568096913481_0456,http://cdh02:8088/proxy/application_1568096913481_0456), /proxy/application_1568096913481_0456
19/09/27 15:33:01 INFO JettyUtils: Adding filter: org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter
19/09/27 15:33:02 INFO Client: Application report for application_1568096913481_0456 (state: RUNNING)
19/09/27 15:33:02 INFO Client:
client token: N/A
diagnostics: N/A
ApplicationMaster host: 10.101.75.194
ApplicationMaster RPC port: 0
queue: root.users.110610172
start time: 1569569577390
final status: UNDEFINED
tracking URL: http://cdh02:8088/proxy/application_1568096913481_0456/
user: 110610172
19/09/27 15:33:02 INFO YarnClientSchedulerBackend: Application application_1568096913481_0456 has started running.
19/09/27 15:33:02 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 49796.
19/09/27 15:33:02 INFO NettyBlockTransferService: Server created on 192.168.1.26:49796
19/09/27 15:33:02 INFO BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy
19/09/27 15:33:02 INFO BlockManagerMaster: Registering BlockManager BlockManagerId(driver, 192.168.1.26, 49796, None)
19/09/27 15:33:02 INFO BlockManagerMasterEndpoint: Registering block manager 192.168.1.26:49796 with 1989.6 MB RAM, BlockManagerId(driver, 192.168.1.26, 49796, None)
19/09/27 15:33:02 INFO BlockManagerMaster: Registered BlockManager BlockManagerId(driver, 192.168.1.26, 49796, None)
19/09/27 15:33:02 INFO BlockManager: Initialized BlockManager: BlockManagerId(driver, 192.168.1.26, 49796, None)
19/09/27 15:33:07 INFO YarnSchedulerBackend$YarnDriverEndpoint: Registered executor NettyRpcEndpointRef(spark-client://Executor) (10.101.75.190:10332) with ID 1
19/09/27 15:33:07 INFO BlockManagerMasterEndpoint: Registering block manager cdh04:24916 with 246.9 MB RAM, BlockManagerId(1, cdh04, 24916, None)
19/09/27 15:33:07 INFO YarnSchedulerBackend$YarnDriverEndpoint: Registered executor NettyRpcEndpointRef(spark-client://Executor) (10.101.75.190:10334) with ID 2
19/09/27 15:33:08 INFO BlockManagerMasterEndpoint: Registering block manager cdh04:27337 with 246.9 MB RAM, BlockManagerId(2, cdh04, 27337, None)
19/09/27 15:33:08 INFO YarnClientSchedulerBackend: SchedulerBackend is ready for scheduling beginning after reached minRegisteredResourcesRatio: 0.8
19/09/27 15:33:08 WARN KafkaUtils: overriding enable.auto.commit to false for executor
19/09/27 15:33:08 WARN KafkaUtils: overriding auto.offset.reset to none for executor
19/09/27 15:33:08 WARN KafkaUtils: overriding executor group.id to spark-executor-remote_test
19/09/27 15:33:08 WARN KafkaUtils: overriding receive.buffer.bytes to 65536 see KAFKA-3135
-------------------------------------------
Time: 1569569610000 ms
-------------------------------------------
(assigned,10)
(serializer,2)
(Setting,10)
(rdd.count(),1)
(class,2)
(=,2)
(newly,10)
(partitions,10)
-------------------------------------------
Time: 1569569640000 ms
-------------------------------------------
-------------------------------------------
Time: 1569569670000 ms
-------------------------------------------
......
集群上查看
Yarn --> 应用程序
image