(待解决) java.io.EOFException: End of File Exception between local host

背景

spark 设置checkpoint 的地址为阿里云的hdfs 报错

  spark.sparkContext.setCheckpointDir('dfs://f***iyuncs.com:10290/test')
集群环境
正式环境
1593273600
2020-06-28 14:45:05.159335
---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
 in 
    357     # print(spark.conf.get('spark.driver.maxResultSize'))
    358     # print(spark.conf.get('spark.default.parallelism'))
--> 359     t.handle_data(spark, n=1)
    360     spark.stop()

 in wrap_f(*args, **kwargs)
     32     def wrap_f(*args, **kwargs):
     33         start_time = time.time()
---> 34         resutl = f(*args, **kwargs)
     35         end_time = time.time()
     36         print(f"""{f.__name__} 程序执行时间 为 {end_time - start_time} """)

 in handle_data(self, spark, n)
    312         exerdf.persist(storageLevel=StorageLevel.MEMORY_AND_DISK)
    313 
--> 314         exerdf.checkpoint()
    315         treedf.checkpoint()
    316         # print(treedf.count())

/opt/python3.6.7/lib/python3.6/site-packages/pyspark/sql/dataframe.py in checkpoint(self, eager)
    433         .. note:: Experimental
    434         """
--> 435         jdf = self._jdf.checkpoint(eager)
    436         return DataFrame(jdf, self.sql_ctx)
    437 

/opt/python3.6.7/lib/python3.6/site-packages/py4j/java_gateway.py in __call__(self, *args)
   1255         answer = self.gateway_client.send_command(command)
   1256         return_value = get_return_value(
-> 1257             answer, self.gateway_client, self.target_id, self.name)
   1258 
   1259         for temp_arg in temp_args:

/opt/python3.6.7/lib/python3.6/site-packages/pyspark/sql/utils.py in deco(*a, **kw)
     61     def deco(*a, **kw):
     62         try:
---> 63             return f(*a, **kw)
     64         except py4j.protocol.Py4JJavaError as e:
     65             s = e.java_exception.toString()

/opt/python3.6.7/lib/python3.6/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
    326                 raise Py4JJavaError(
    327                     "An error occurred while calling {0}{1}{2}.\n".
--> 328                     format(target_id, ".", name), value)
    329             else:
    330                 raise Py4JError(

Py4JJavaError: An error occurred while calling o1812.checkpoint.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in stage 1.0 (TID 4, 172.20.4.28, executor 0): java.io.EOFException: End of File Exception between local host is: "workers-k6dnm/172.20.4.28"; destination host is: "f-**fs.aliyuncs.com":10290; : java.io.EOFException; For more details see:  http://wiki.apache.org/hadoop/EOFException
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:824)
	at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:788)
	at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1495)

(待解决) java.io.EOFException: End of File Exception between local host_第1张图片

你可能感兴趣的:(pyspark)