1.下载压缩包,解压
https://www.apache.org/dyn/closer.lua/incubator/livy/0.5.0-incubating/livy-0.5.0-incubating-bin.zip
unzip livy-0.5.0-incubating-bin.zip
2.配置环境变量
vim .profile(ubuntu,redhat/centos .bash_profile)
export HADOOP_HOME=/usr/cdh/hadoop
export SPARK_HOME=/usr/cdh/spark
export LIVY_HOME=/usr/cdh/livy/livy-0.5.0-incubating-bin
export PATH=$PATH:$KAFKA_HOME/bin:$SCALA_HOME/bin:$JAVA_HOME/bin:$HBASE_HOME/bin:$ES_HOME/bin:$NODE_HOME/bin:$LIVY_HOME/bin
3.运行
livy-server start
4.查看所有 sessions
curl hadoop:8998/sessions
以json(python自带) 格式查看
curl hadoop:8998/session | python -m json.tool
5.创建session
curl -X POST --data '{"kind":"spark"}' -H "Content-Type:application/json" hadoop:8998/sessions
curl hadoop:8998/sessions
{"from":0,"total":1,"sessions":[{"id":0,"appId":null,"owner":null,"proxyUser":null,"state":"idle","kind":"spark","appInfo":{"driverLogUrl":null,"sparkUiUrl":null},"log":["18/05/27 19:19:52 INFO spark.SparkContext: Added JAR file:/usr/cdh/livy/livy-0.5.0-incubating-bin/repl_2.10-jars/livy-core_2.10-0.5.0-incubating.jar at http://192.168.1.110:38659/jars/livy-core_2.10-0.5.0-incubating.jar with timestamp 1527419992075","18/05/27 19:19:52 INFO executor.Executor: Starting executor ID driver on host localhost","18/05/27 19:19:52 INFO executor.Executor: Using REPL class URI: http://192.168.1.110:40017","18/05/27 19:19:52 INFO util.Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 41793.","18/05/27 19:19:52 INFO netty.NettyBlockTransferService: Server created on 41793","18/05/27 19:19:52 INFO storage.BlockManagerMaster: Trying to register BlockManager","18/05/27 19:19:52 INFO storage.BlockManagerMasterEndpoint: Registering block manager localhost:41793 with 511.1 MB RAM, BlockManagerId(driver, localhost, 41793)","18/05/27 19:19:52 INFO storage.BlockManagerMaster: Registered BlockManager","18/05/27 19:19:52 INFO driver.SparkEntries: Spark context finished initialization in 1643ms","18/05/27 19:19:53 INFO driver.SparkEntries: Created SQLContext."]}]}
6.根据session 的id查看会话状态
curl hadoop:8998/sessions/0
{"id":0,"appId":null,"owner":null,"proxyUser":null,"state":"idle","kind":"spark","appInfo":{"driverLogUrl":null,"sparkUiUrl":null},"log":["18/05/27 19:19:52 INFO spark.SparkContext: Added JAR file:/usr/cdh/livy/livy-0.5.0-incubating-bin/repl_2.10-jars/livy-core_2.10-0.5.0-incubating.jar at http://192.168.1.110:38659/jars/livy-core_2.10-0.5.0-incubating.jar with timestamp 1527419992075","18/05/27 19:19:52 INFO executor.Executor: Starting executor ID driver on host localhost","18/05/27 19:19:52 INFO executor.Executor: Using REPL class URI: http://192.168.1.110:40017","18/05/27 19:19:52 INFO util.Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 41793.","18/05/27 19:19:52 INFO netty.NettyBlockTransferService: Server created on 41793","18/05/27 19:19:52 INFO storage.BlockManagerMaster: Trying to register BlockManager","18/05/27 19:19:52 INFO storage.BlockManagerMasterEndpoint: Registering block manager localhost:41793 with 511.1 MB RAM, BlockManagerId(driver, localhost, 41793)","18/05/27 19:19:52 INFO storage.BlockManagerMaster: Registered BlockManager","18/05/27 19:19:52 INFO driver.SparkEntries: Spark context finished initialization in 1643ms","18/05/27 19:19:53 INFO driver.SparkEntries: Created SQLContext."]}spark@hadoop:~$
以 python json格式查看
curl hadoop:8998/sessions/0 | python -m json.tool
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 1280 100 1280 0 0 152k 0 --:--:-- --:--:-- --:--:-- 156k
{
"appId": null,
"appInfo": {
"driverLogUrl": null,
"sparkUiUrl": null
},
"id": 0,
"kind": "spark",
"log": [
"18/05/27 19:19:52 INFO spark.SparkContext: Added JAR file:/usr/cdh/livy/livy-0.5.0-incubating-bin/repl_2.10-jars/livy-core_2.10-0.5.0-incubating.jar at http://192.168.1.110:38659/jars/livy-core_2.10-0.5.0-incubating.jar with timestamp 1527419992075",
"18/05/27 19:19:52 INFO executor.Executor: Starting executor ID driver on host localhost",
"18/05/27 19:19:52 INFO executor.Executor: Using REPL class URI: http://192.168.1.110:40017",
"18/05/27 19:19:52 INFO util.Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 41793.",
"18/05/27 19:19:52 INFO netty.NettyBlockTransferService: Server created on 41793",
"18/05/27 19:19:52 INFO storage.BlockManagerMaster: Trying to register BlockManager",
"18/05/27 19:19:52 INFO storage.BlockManagerMasterEndpoint: Registering block manager localhost:41793 with 511.1 MB RAM, BlockManagerId(driver, localhost, 41793)",
"18/05/27 19:19:52 INFO storage.BlockManagerMaster: Registered BlockManager",
"18/05/27 19:19:52 INFO driver.SparkEntries: Spark context finished initialization in 1643ms",
"18/05/27 19:19:53 INFO driver.SparkEntries: Created SQLContext."
],
"owner": null,
"proxyUser": null,
"state": "idle"
}
7 提交任务
curl hadoop:8998/sessions/0/statements -X POST -H 'Content-Type:apllication/json' -d '{"code":"1+1"}'
查看执行结果:
curl hadoop:8998/sessions/0/statements/1
{"id":1,"code":"1+1","state":"available","output":{"status":"ok","execution_count":1,"data":{"text/plain":"res1: Int = 2\n"}},"progress":1.0
8共享变量
curl hadoop:8998/sessions/0/statements -X POST -H 'Content-Type:application/json' -d '{"code":"val rdd = sc.makeRDD(List(\"a\",1,\"b\",2))"}'
统计RDD 长度
curl hadoop:8998/sessions/0/statements -X POST -H 'Content-Type:application/json' -d '{"code":"rdd.count"}'
查看同一个session(0)中任务
spark@hadoop:~$ curl hadoop:8998/sessions/0/statements/4 | python -m json.tool
spark@hadoop:~$ curl hadoop:8998/sessions/0/statements/3 | python -m json.tool
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 170 100 170 0 0 14768 0 --:--:-- --:--:-- --:--:-- 15454
{
"code": "sc.makeRDD(List(1,2,3,4)).count",
"id": 3,
"output": {
"data": {
"text/plain": "res3: Long = 4\n"
},
"execution_count": 3,
"status": "ok"
},
"progress": 1.0,
"state": "available"
}
spark@hadoop:~$ curl hadoop:8998/sessions/0/statements/4 | python -m json.tool
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 170 100 170 0 0 12721 0 --:--:-- --:--:-- --:--:-- 13076
{
"code": "sc.makeRDD(List(1,2,3,4)).count",
"id": 4,
"output": {
"data": {
"text/plain": "res4: Long = 4\n"
},
"execution_count": 4,
"status": "ok"
},
"progress": 1.0,
"state": "available
9.删除会话
curl hadoop:8998/sessions/0 -X DELETE
{"msg":"deleted"}
10 spark 人物提交yarn配置参数
curl -X POST --data '{"kind":"spark","numExecutors":3,"executorMemory":"2G"}' -H "Content-Type:application/json" hadoop:8998/sessions | python -m json.tool
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 222 100 167 100 55 7643 2517 --:--:-- --:--:-- --:--:-- 7952
{
"appId": null,
"appInfo": {
"driverLogUrl": null,
"sparkUiUrl": null
},
"id": 2,
"kind": "spark",
"log": [
"stdout: ",
"\nstderr: "
],
"owner": null,
"proxyUser": null,
"state": "starting"
}