错误:TypeError: Cannot create a consistent method resolution order (MRO) for bases object, JavaModel, JavaPredictionModel, JavaMLWritable, XGBoostReadable
import numpy as np
import pandas as pd
import os
import re
from sklearn import metrics
import matplotlib.pyplot as plt
os.environ['PYSPARK_SUBMIT_ARGS'] = '--jars xgboost4j-spark-0.72.jar,xgboost4j-0.72.jar pyspark-shell' # 本地运行时,jar包放在当前代码的路径下;提交任务时,利用--jars参数指定
import findspark
findspark.init()
import pyspark
from pyspark.conf import SparkConf
from pyspark.sql import SparkSession
from pyspark.ml.feature import OneHotEncoder, StringIndexer, VectorAssembler
from pyspark.ml import Pipeline
from pyspark.sql.functions import col
spark = SparkSession\
.builder\
.appName("PySpark XGBOOST")\
.master("local[*]")\
.getOrCreate()
from pyspark.sql.types import *
from pyspark.ml.feature import StringIndexer, VectorAssembler
spark.sparkContext.addPyFile("sparkxgb.zip")
from sparkxgb import XGBoostEstimator
import pyspark.sql.functions as F
import pyspark.sql.types as T
错误提示:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-3-76ed78796af5> in <module>
26 from pyspark.ml.feature import StringIndexer, VectorAssembler
27 spark.sparkContext.addPyFile("sparkxgb.zip")
---> 28 from sparkxgb import XGBoostEstimator
29 import pyspark.sql.functions as F
30 import pyspark.sql.types as T
c:\users\wuyun\appdata\local\programs\python\python36\lib\importlib\_bootstrap.py in _find_and_load(name, import_)
c:\users\wuyun\appdata\local\programs\python\python36\lib\importlib\_bootstrap.py in _find_and_load_unlocked(name, import_)
c:\users\wuyun\appdata\local\programs\python\python36\lib\importlib\_bootstrap.py in _load_unlocked(spec)
c:\users\wuyun\appdata\local\programs\python\python36\lib\importlib\_bootstrap.py in _load_backward_compatible(spec)
~\AppData\Local\Temp\spark-0a921313-e576-407d-bed0-19079db92fc9\userFiles-6a1f7c21-f62d-4c16-9402-674b1c4ad979\sparkxgb.zip\sparkxgb\__init__.py in <module>
18
19 from sparkxgb.pipeline import XGBoostPipeline, XGBoostPipelineModel
---> 20 from sparkxgb.xgboost import XGBoostEstimator, XGBoostClassificationModel, XGBoostRegressionModel
21
22 __all__ = ["XGBoostEstimator", "XGBoostClassificationModel", "XGBoostRegressionModel",
c:\users\wuyun\appdata\local\programs\python\python36\lib\importlib\_bootstrap.py in _find_and_load(name, import_)
c:\users\wuyun\appdata\local\programs\python\python36\lib\importlib\_bootstrap.py in _find_and_load_unlocked(name, import_)
c:\users\wuyun\appdata\local\programs\python\python36\lib\importlib\_bootstrap.py in _load_unlocked(spec)
c:\users\wuyun\appdata\local\programs\python\python36\lib\importlib\_bootstrap.py in _load_backward_compatible(spec)
~\AppData\Local\Temp\spark-0a921313-e576-407d-bed0-19079db92fc9\userFiles-6a1f7c21-f62d-4c16-9402-674b1c4ad979\sparkxgb.zip\sparkxgb\xgboost.py in <module>
182
183
--> 184 class XGBoostClassificationModel(JavaParamsOverrides, JavaModel, JavaPredictionModel, JavaMLWritable, XGBoostReadable):
185 """
186 A PySpark implementation of ml.dmlc.xgboost4j.scala.spark.XGBoostClassificationModel.
c:\users\wuyun\appdata\local\programs\python\python36\lib\abc.py in __new__(mcls, name, bases, namespace, **kwargs)
131
132 def __new__(mcls, name, bases, namespace, **kwargs):
--> 133 cls = super().__new__(mcls, name, bases, namespace, **kwargs)
134 # Compute set of abstract method names
135 abstracts = {name
TypeError: Cannot create a consistent method resolution
order (MRO) for bases object, JavaModel, JavaPredictionModel, JavaMLWritable, XGBoostReadable
问题出在第28行,出现MRO错误,无法为基对象、javamodel、JavaPredictionModel、JavaMLWritable、XGBoostReadable创建一致的方法解析顺序(MRO) 。
采用super更改原初始化文件中sparkxgb.zip\sparkxgb_init_.py中的继承方法,禁止多次调用,保证每个父类函数只调用一次(如果每个类都使用super)
【python】详解类class的方法解析顺序MRO(Method Resolution Order)(六)本文链接:https://blog.csdn.net/brucewong0516/article/details/79124212
在【python】详解类class的继承、init初始化、super方法(五)详见链接一文中通过super的方法使得子类调用父类的属性或函数方法。正是因为class有方法解析顺序MRO。此文将详解MRO运行逻辑。
假设现在有一个如下的继承结构,首先通过类名显示调用的方式来调用父类的初始化函数:
#父类A,调用时先print:Enter A,再Leave A
class A(object):
def __init__(self):
print( " ->Enter A")
print( " <-Leave A" )
#B类继承A类,B的初始化,先print:Enter B;
#再调用A父类的初始化,运行A父类初始化打印E: A,L: A;
#最后L: B。
class B(A):
def __init__(self):
print( " -->Enter B")
A.__init__(self)
print (" <--Leave B")
#C类继承A类,C的初始化,先print:Enter C;
#再调用A父类的初始化,运行A父类初始化打印E: A,L: A;
#最后L: C。
class C(A):
def __init__(self):
print (" --->Enter C")
A.__init__(self)
print (" <---Leave C")
#D类继承B、C类,先初始化进入D,print:Enter D
#先调用B类进行初始化,运行B类的初始化
#再调用C类进行初始化,运行C类的初始化
#最后打印:Leave D
class D(B, C):
def __init__(self):
print ("---->Enter D")
B.__init__(self)
C.__init__(self)
print( "<----Leave D")
d = D()123456789101112131415161718192021222324252627282930313233
输出结果:
#D类继承B、C类,先初始化进入D,print:Enter D
---->Enter D
#先调用B类进行初始化,运行B类的初始化
-->Enter B
->Enter A
<-Leave A
<--Leave B
#再调用C类进行初始化,运行C类的初始化
--->Enter C
->Enter A
<-Leave A
<---Leave C
#最后打印:Leave D
<----Leave D1234567891011121314
从输出中可以看到,类A的初始化函数被调用了两次,这不是我们想要的结果;我们通过super方式来调用父类的初始化函数:
class A(object):
def __init__(self):
print( " ->Enter A")
print( " <-Leave A" )
class B(A):
def __init__(self):
print( " -->Enter B")
super(B, self).__init__()
print (" <--Leave B")
class C(A):
def __init__(self):
print (" --->Enter C")
super(C, self).__init__()
print (" <---Leave C")
class D(B, C):
def __init__(self):
print ("---->Enter D")
super(D, self).__init__()
print( "<----Leave D")
d = D()12345678910111213141516171819202122
输出结果:
---->Enter D
-->Enter B
--->Enter C
->Enter A
<-Leave A
<---Leave C
<--Leave B
<----Leave D12345678
通过输出可以看到,当使用super后,A的初始化函数只能调用了一次。这是由于Python的类有一个*_ mro* _ 属性,这个属性中就保存着方法解析顺序。结合上面的例子来看看类D的 _ *mro* _:
print("MRO:", [x.__name__ for x in D.__mro__])1
输出结果
MRO: ['D', 'B', 'C', 'A', 'object']
12
因为重复引用,所以直接注释掉#from sparkxgb import XGBoostEstimator句,可以跳过重复继承