#SparkContext初始化失败,java空指针异常
在windows上运行spark,已经按照网上步骤安装了hadoop-common-bin,配置了host和环境变量,pycharm跑起来就报下面的错误.
代码如下:
from pyspark import SparkConf
from pyspark.sql import SparkSession
#spark config
conf = (SparkConf().setMaster("spark://xx.xx.xx.xx:7077").setAppName("test")
.setAll([('spark.executor.memory','10G'),('spark.executor.cores','5'),('spark.cores.max','100')]))
spark = SparkSession.builder.config(conf=conf).enableHiveSupport().getOrCreate()
spark.sql('use wgdb')
报错信息:
Setting default log level to “WARN”.
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
2019-02-12 11:51:21 ERROR StandaloneSchedulerBackend:70 - Application has been killed. Reason: All masters are unresponsive! Giving up.
2019-02-12 11:51:21 WARN StandaloneSchedulerBackend:66 - Application ID is not initialized yet.
2019-02-12 11:51:21 WARN StandaloneAppClient$ClientEndpoint:66 - Drop UnregisterApplication(null) because has not yet connected to master
2019-02-12 11:51:21 WARN MetricsSystem:66 - Stopping a MetricsSystem that is not running
2019-02-12 11:51:21 ERROR SparkContext:91 - Error initializing SparkContext.
java.lang.NullPointerException
at org.apache.spark.storage.BlockManagerMaster.registerBlockManager(BlockManagerMaster.scala:64)
at org.apache.spark.storage.BlockManager.initialize(BlockManager.scala:248)
at org.apache.spark.SparkContext.(SparkContext.scala:510)
at org.apache.spark.api.java.JavaSparkContext.(JavaSparkContext.scala:58)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:238)
at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Traceback (most recent call last):
File “C:/Users/wangan1/PycharmProjects/ph_frige1/may_id.py”, line 6, in
spark = SparkSession.builder.config(conf=conf).enableHiveSupport().getOrCreate()
File “D:\python2\lib\site-packages\pyspark\sql\session.py”, line 173, in getOrCreate
sc = SparkContext.getOrCreate(sparkConf)
File “D:\python2\lib\site-packages\pyspark\context.py”, line 349, in getOrCreate
SparkContext(conf=conf or SparkConf())
File “D:\python2\lib\site-packages\pyspark\context.py”, line 118, in init
conf, jsc, profiler_cls)
File “D:\python2\lib\site-packages\pyspark\context.py”, line 180, in _do_init
self._jsc = jsc or self._initialize_context(self._conf._jconf)
File “D:\python2\lib\site-packages\pyspark\context.py”, line 288, in _initialize_context
return self._jvm.JavaSparkContext(jconf)
File “D:\python2\lib\site-packages\py4j\java_gateway.py”, line 1525, in call
answer, self._gateway_client, None, self._fqn)
File “D:\python2\lib\site-packages\py4j\protocol.py”, line 328, in get_return_value
format(target_id, “.”, name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext.
: java.lang.NullPointerException
at org.apache.spark.storage.BlockManagerMaster.registerBlockManager(BlockManagerMaster.scala:64)
at org.apache.spark.storage.BlockManager.initialize(BlockManager.scala:248)
at org.apache.spark.SparkContext.(SparkContext.scala:510)
at org.apache.spark.api.java.JavaSparkContext.(JavaSparkContext.scala:58)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:238)
at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
最后解决了,原因非常蠢,是因为在本地安装的spark与集群中的版本不同。