spark 2.0.0 开始
SparkSession spark = SparkSession
.builder()
.master("Local")
.appName("JavaWordCount")
.getOrCreate();
16/09/10 09:41:27 ERROR SparkContext: Error initializing SparkContext.
java.lang.IllegalArgumentException: System memory 259522560 must be at least 471859200. Please increase heap size using the --driver-memory option or spark.driver.memory in Spark configuration.
at org.apache.spark.memory.UnifiedMemoryManager$.getMaxMemory(UnifiedMemoryManager.scala:212)
at org.apache.spark.memory.UnifiedMemoryManager$.apply(UnifiedMemoryManager.scala:194)
at org.apache.spark.SparkEnv$.create(SparkEnv.scala:308)
at org.apache.spark.SparkEnv$.createDriverEnv(SparkEnv.scala:165)
at org.apache.spark.SparkContext.createSparkEnv(SparkContext.scala:259)
at org.apache.spark.SparkContext.
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2256)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$8.apply(SparkSession.scala:831)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$8.apply(SparkSession.scala:823)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:823)
at com.dt.spark200.Spark200Test.main(Spark200Test.java:37)
16/09/10 09:41:27 INFO SparkContext: Successfully stopped SparkContext
Exception in thread "main" java.lang.IllegalArgumentException: System memory 259522560 must be at least 471859200. Please increase heap size using the --driver-memory option or spark.driver.memory in Spark configuration.
at org.apache.spark.memory.UnifiedMemoryManager$.getMaxMemory(UnifiedMemoryManager.scala:212)
at org.apache.spark.memory.UnifiedMemoryManager$.apply(UnifiedMemoryManager.scala:194)
at org.apache.spark.SparkEnv$.create(SparkEnv.scala:308)
at org.apache.spark.SparkEnv$.createDriverEnv(SparkEnv.scala:165)
at org.apache.spark.SparkContext.createSparkEnv(SparkContext.scala:259)
at org.apache.spark.SparkContext.
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2256)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$8.apply(SparkSession.scala:831)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$8.apply(SparkSession.scala:823)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:823)
at com.dt.spark200.Spark200Test.main(Spark200Test.java:37)
设置-Xms256m -Xmx1024m 解决
16/09/10 09:44:07 INFO SparkContext: Successfully stopped SparkContext
Exception in thread "main" org.apache.spark.SparkException: Could not parse Master URL: 'Local'
at org.apache.spark.SparkContext$.org$apache$spark$SparkContext$$createTaskScheduler(SparkContext.scala:2499)
at org.apache.spark.SparkContext.
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2256)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$8.apply(SparkSession.scala:831)
at org.apache.spark.sql.SparkSession$Builder$$anonfun$8.apply(SparkSession.scala:823)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:823)
at com.dt.spark200.Spark200Test.main(Spark200Test.java:36)
16/09/10 09:44:07 INFO ShutdownHookManager: Shutdown hook called
解决:
SparkSession spark = SparkSession
.builder()
.master("local")
.appName("JavaWordCount")
.getOrCreate();
16/09/10 10:00:04 INFO SharedState: Warehouse path is 'file:G:\IMFBigDataSpark2016\IMFJavaWorkspace_Spark200\Spark200Demo/spark-warehouse'.
Exception in thread "main" java.lang.IllegalArgumentException: java.net.URISyntaxException: Relative path in absolute URI: file:G:/IMFBigDataSpark2016/IMFJavaWorkspace_Spark200/Spark200Demo/spark-warehouse
at org.apache.hadoop.fs.Path.initialize(Path.java:206)
at org.apache.hadoop.fs.Path.
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.makeQualifiedPath(SessionCatalog.scala:114)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.createDatabase(SessionCatalog.scala:145)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.
at org.apache.spark.sql.internal.SessionState.catalog$lzycompute(SessionState.scala:95)
at org.apache.spark.sql.internal.SessionState.catalog(SessionState.scala:95)
at org.apache.spark.sql.internal.SessionState$$anon$1.
at org.apache.spark.sql.internal.SessionState.analyzer$lzycompute(SessionState.scala:112)
at org.apache.spark.sql.internal.SessionState.analyzer(SessionState.scala:111)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:49)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64)
at org.apache.spark.sql.SparkSession.baseRelationToDataFrame(SparkSession.scala:382)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:143)
at org.apache.spark.sql.DataFrameReader.text(DataFrameReader.scala:492)
at org.apache.spark.sql.DataFrameReader.textFile(DataFrameReader.scala:528)
at org.apache.spark.sql.DataFrameReader.textFile(DataFrameReader.scala:501)
at com.dt.spark200.Spark200Test.main(Spark200Test.java:40)
Caused by: java.net.URISyntaxException: Relative path in absolute URI: file:G:/IMFBigDataSpark2016/IMFJavaWorkspace_Spark200/Spark200Demo/spark-warehouse
at java.net.URI.checkPath(Unknown Source)
at java.net.URI.
at org.apache.hadoop.fs.Path.initialize(Path.java:203)
... 17 more
问题解决
SparkSession spark = SparkSession
.builder()
.master("local")
.appName("JavaWordCount")
.config("spark.sql.warehouse.dir", "file:///G:/IMFBigDataSpark2016/IMFJavaWorkspace_Spark200/Spark200Demo/spark-warehouse")
.getOrCreate();
运行结果
16/09/10 10:10:50 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 40 ms
16/09/10 10:10:50 INFO Executor: Finished task 0.0 in stage 1.0 (TID 1). 2394 bytes result sent to driver
16/09/10 10:10:50 INFO TaskSetManager: Finished task 0.0 in stage 1.0 (TID 1) in 337 ms on localhost (1/1)
16/09/10 10:10:50 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool
16/09/10 10:10:50 INFO DAGScheduler: ResultStage 1 (collect at Spark200Test.java:66) finished in 0.338 s
16/09/10 10:10:50 INFO DAGScheduler: Job 0 finished: collect at Spark200Test.java:66, took 2.305265 s
Spark: 2
analysis.: 1
provides: 1
is: 1
R,: 1
APIs: 1
general: 2
a: 1
Big: 1
fast: 1
high-level: 1
that: 1
Java,: 1
Apache: 1
computation: 1
data: 1
: 2
in: 1
optimized: 1
graphs: 1
cluster: 1
Data.: 1
It: 1
for: 2
Scala,: 1
computing: 1
Python,: 1
an: 1
and: 3
supports: 1
engine: 1
system: 1
16/09/10 10:10:50 INFO SparkUI: Stopped Spark web UI at http://132.150.75.19:4040
16/09/10 10:10:50 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
测试代码:
package com.dt.spark200;
import scala.Tuple2;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession.Builder;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
public class Spark200Test {
private static final Pattern SPACE = Pattern.compile(" ");
public static void main(String[] args) throws Exception {
/*if (args.length < 1) {
System.err.println("Usage: JavaWordCount
System.exit(1);
}*/
SparkSession spark = SparkSession
.builder()
.master("local")
.appName("JavaWordCount")
.config("spark.sql.warehouse.dir", "file:///G:/IMFBigDataSpark2016/IMFJavaWorkspace_Spark200/Spark200Demo/spark-warehouse")
.getOrCreate();
String textfile = "G://IMFBigDataSpark2016//tesdata//spark200.txt";
JavaRDD
JavaRDD
@Override
public Iterator
return Arrays.asList(SPACE.split(s)).iterator();
}
});
JavaPairRDD
new PairFunction
@Override
public Tuple2
return new Tuple2<>(s, 1);
}
});
JavaPairRDD
new Function2
@Override
public Integer call(Integer i1, Integer i2) {
return i1 + i2;
}
});
List
for (Tuple2,?> tuple : output) {
System.out.println(tuple._1() + ": " + tuple._2());
}
spark.stop();
while(true){}
}
}