# curl -v -X POST 'http://10.2.45.231:8088/ws/v1/cluster/apps/new-application'
得到applicationid为 application_1472797340021_0302
# hadoop fs -stat '%b %Y' /demo.txt
356 1473843721218
{
"am-container-spec":{
"commands":{
"command":" /opt/jdk1.8.0_66/bin/java -Xmx1024m org.apache.spark.deploy.yarn.ApplicationMaster --jar __app__.jar --class com.zdhuang.WordCount --args hdfs://pdmiCluster/demo.txt --args hdfs://pdmiCluster/output "
},
"environment":{
"entry":[
{
"key":"SPARK_YARN_MODE",
"value":true
},
{
"key":"SPARK_YARN_STAGING_DIR",
"value":" "
},
{
"key":"HDP_VERSION",
"value":"2.4.2.0-258"
},
{
"key":"CLASSPATH",
"value":"__spark__.jar__app__.jar__app__.properties/usr/hdp/2.4.2.0-258/spark/conf/usr/hdp/2.4.2.0-258/spark/lib/*/usr/hdp/2.4.2.0-258/hadoop/conf/usr/hdp/2.4.2.0-258/hadoop/lib/*/usr/hdp/2.4.2.0-258/hadoop/.//*/usr/hdp/2.4.2.0-258/hadoop-hdfs/.//usr/hdp/2.4.2.0-258/hadoop-hdfs/lib/*/usr/hdp/2.4.2.0-258/hadoop-hdfs/.//*/usr/hdp/2.4.2.0-258/hadoop-yarn/lib/*/usr/hdp/2.4.2.0-258/hadoop-yarn/.//*/usr/hdp/2.4.2.0-258/hadoop-mapreduce/lib/*/usr/hdp/2.4.2.0-258/hadoop-mapreduce/.//*/usr/share/java/mysql-connector-java-5.1.17.jar/usr/share/java/mysql-connector-java.jar/usr/hdp/current/hadoop-mapreduce-client/*"
},
{
"key":"SPARK_YARN_CACHE_FILES",
"value":"hdfs://pdmiCluster/Test/Spark/wc_v1.00.jar#__app__.jar,hdfs://pdmiCluster/Test/Spark/spark-assembly-1.6.1.2.4.2.0-258-hadoop2.7.1.2.4.2.0-258.jar#__spark__.jar"
},
{
"key":"SPARK_YARN_CACHE_FILES_FILE_SIZES",
"value":"16008,185971201"
},
{
"key":"SPARK_YARN_CACHE_FILES_TIME_STAMPS",
"value":"1473924988796,1474440112743"
},
{
"key":"SPARK_YARN_CACHE_FILES_VISIBILITIES",
"value":"PUBLIC,PRIVATE"
}
]
},
"local-resources":{
"entry":[
{
"key":"__app__.jar",
"value":{
"resource":"hdfs://pdmiCluster/Test/Spark/wc_v1.00.jar",
"size":16008,
"timestamp":1473924988796,
"type":"FILE",
"visibility":"APPLICATION" }
},
{
"key":"__spark__.jar",
"value":{
"resource":"hdfs://pdmiCluster/Test/Spark/spark-assembly-1.6.1.2.4.2.0-258-hadoop2.7.1.2.4.2.0-258.jar",
"size":185971201,
"timestamp":1474440112743,
"type":"FILE",
"visibility":"APPLICATION" }
},
{
"key":"__app__.properties",
"value":{
"resource":"hdfs://pdmiCluster/Test/Spark/spark-yarn.properties",
"size":963,
"timestamp":1474533638137,
"type":"FILE",
"visibility":"APPLICATION" }
}
]
}
},
"application-id":"application_1472797340021_0302",
"application-name":"appsjc",
"application-type":"SPARK",
"queue":"test",
"priority":3,
"keep-containers-across-application-attempts":false,
"max-app-attempts":2,
"resource":{
"memory":1024,
"vCores":1
},
"unmanaged-AM":false
}
解释json内容:
"command":" /opt/jdk1.8.0_66/bin/java -Xmx1024m org.apache.spark.deploy.yarn.ApplicationMaster --jar __app__.jar --class com.zdhuang.WordCount --args hdfs://pdmiCluster/demo.txt --args hdfs://pdmiCluster/output "
java 虚拟内存 启动jar包的spark入口类 任务jar包别名 主类名 输入文件 输出目录
请根据自身环境修改相关参数(非加粗部分)
pdmiCluster 为dfs.nameservices
"key":"HDP_VERSION","value":"2.4.2.0-258"
classpath:
执行jar包在hdfs的全路径#别名,spark-assembly 包在hdfs的全路径#别名
执行jar包和spark-assembly 包在hdfs上的大小
执行jar包和spark-assembly 包在hdfs上的时间戳
#curl -s -i -X POST -H 'Accept: application/json' -H 'Content-Type: application/json' http://10.2.45.231:8088/ws/v1/cluster/apps --data-binary @spark-yarn.json
# hdfs dfs -ls /output
Found 3 items
-rw-r--r-- 3 yarn hdfs 0 2016-09-22 17:59 /output/_SUCCESS
-rw-r--r-- 3 yarn hdfs 237 2016-09-22 17:59 /output/part-00000
-rw-r--r-- 3 yarn hdfs 254 2016-09-22 17:59 /output/part-00001
# hdfs dfs -cat /output/part-00000
(allows,1)
(resource,1)
(is,3)
(Uniform,1)
(file,,2)
(address,1)
(anything,1)
(audio,1)
(When,1)
(some,1)
(get,1)
(locate,1)
(been,1)
(what's,1)
(thing,1)
(resource.,2)
(with,,1)
(just,1)
(what,1)
(accessed;,1)
(Resource,1)
(Locator,,1)