使用spark引擎进行build cube任务的时候,出现如下异常,该异常自从kylin2.4.0版本至最新的kylin2.6.2版本依然存在。
19/05/05 17:36:42 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0, spslave4.bigdata.ly, executor 1): java.lang.ExceptionInInitializerError
at org.apache.kylin.metadata.datatype.DataType.(DataType.java:134)
at java.io.ObjectStreamClass.hasStaticInitializer(Native Method)
at java.io.ObjectStreamClass.computeDefaultSUID(ObjectStreamClass.java:1787)
at java.io.ObjectStreamClass.access$100(ObjectStreamClass.java:72)
at java.io.ObjectStreamClass$1.run(ObjectStreamClass.java:253)
at java.io.ObjectStreamClass$1.run(ObjectStreamClass.java:251)
at java.security.AccessController.doPrivileged(Native Method)
at java.io.ObjectStreamClass.getSerialVersionUID(ObjectStreamClass.java:250)
at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:611)
at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1630)
at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1521)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1781)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.readArray(ObjectInputStream.java:1714)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1347)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:373)
at java.util.HashSet.readObject(HashSet.java:333)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1058)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1909)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:373)
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75)
at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:85)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.scheduler.Task.run(Task.scala:99)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:325)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.RuntimeException: fail to locate kylin.properties
at org.apache.kylin.common.KylinConfig.buildSiteOrderedProps(KylinConfig.java:378)
at org.apache.kylin.common.KylinConfig.buildSiteProperties(KylinConfig.java:354)
at org.apache.kylin.common.KylinConfig.getInstanceFromEnv(KylinConfig.java:134)
at org.apache.kylin.measure.MeasureTypeFactory.init(MeasureTypeFactory.java:120)
at org.apache.kylin.measure.MeasureTypeFactory.(MeasureTypeFactory.java:98)
... 71 more
通过查看源码发现在MeasureTypeFactory这个类的init方法中有一段代码.
该代码的逻辑主要是读取配置文件中前缀为kylin.metadata.custom-measure-types.的配置项,从而获取自定义的MeasureType类型。由于在通过KylinConfig.getInstanceFromEnv来获取KylinConfig的时候报错,而try{}catch{}并没有捕获到该异常导致任务执行失败。
try {
Map customMeasureTypes = KylinConfig.getInstanceFromEnv().getCubeCustomMeasureTypes();
for (String customFactory : customMeasureTypes.values()) {
try {
logger.info("Checking custom measure types from kylin config: " + customFactory);
factoryInsts.add((MeasureTypeFactory) Class.forName(customFactory).newInstance());
} catch (Exception e) {
throw new IllegalArgumentException("Unrecognized MeasureTypeFactory classname: " + customFactory,
e);
}
}
} catch (KylinConfigCannotInitException e) {
logger.warn("Will not add custome MeasureTypeFactory as KYLIN_CONF nor KYLIN_HOME is set");
}
我们跟踪KylinConfig.getInstanceFromEnv()方法对应的代码,发现在执行buildSiteProperties()方法的时候报的错。
public static KylinConfig getInstanceFromEnv() {
synchronized (KylinConfig.class) {
KylinConfig config = THREAD_ENV_INSTANCE.get();
if (config != null) {
return config;
}
if (SYS_ENV_INSTANCE == null) {
try {
//build default ordered properties will only be called once.
//This logic no need called by CoProcessor due to it didn't call getInstanceFromEnv.
buildDefaultOrderedProperties();
config = new KylinConfig();
config.reloadKylinConfig(buildSiteProperties());
logger.info("Initialized a new KylinConfig from getInstanceFromEnv : "
+ System.identityHashCode(config));
SYS_ENV_INSTANCE = config;
} catch (IllegalArgumentException e) {
throw new IllegalStateException("Failed to find KylinConfig ", e);
}
}
return SYS_ENV_INSTANCE;
}
}
继续跟踪代码,是由于buildSiteOrderedProps()方法导致的错误。
private static Properties buildSiteProperties() {
Properties conf = new Properties();
OrderedProperties orderedProperties = buildSiteOrderedProps();
for (Map.Entry each : orderedProperties.entrySet()) {
conf.put(each.getKey(), each.getValue().trim());
}
return conf;
}
继续跟踪代码
private static OrderedProperties buildSiteOrderedProps() {
try {
// 1. load default configurations from classpath.
// we have kylin-defaults.properties in kylin/core-common/src/main/resources
// Load them each time will caused thread block when multiple query request to Kylin
OrderedProperties orderedProperties = new OrderedProperties();
orderedProperties.putAll(defaultOrderedProperties);
// 2. load site conf, to keep backward compatibility it's still named kylin.properties
// actually it's better to be named kylin-site.properties
File propFile = getSitePropertiesFile();
if (propFile == null || !propFile.exists()) {
logger.error("fail to locate " + KYLIN_CONF_PROPERTIES_FILE + " at '"
+ (propFile != null ? propFile.getAbsolutePath() : "") + "'");
throw new RuntimeException("fail to locate " + KYLIN_CONF_PROPERTIES_FILE);
}
loadPropertiesFromInputStream(new FileInputStream(propFile), orderedProperties);
// 3. still support kylin.properties.override as secondary override
// not suggest to use it anymore
File propOverrideFile = new File(propFile.getParentFile(), propFile.getName() + ".override");
if (propOverrideFile.exists()) {
loadPropertiesFromInputStream(new FileInputStream(propOverrideFile), orderedProperties);
}
return orderedProperties;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
跟踪getSitePropertiesFile() 方法。
static File getSitePropertiesFile() {
String kylinConfHome = System.getProperty(KYLIN_CONF);
if (!StringUtils.isEmpty(kylinConfHome)) {
logger.info("Use KYLIN_CONF=" + kylinConfHome);
return existFile(kylinConfHome);
}
logger.debug("KYLIN_CONF property was not set, will seek KYLIN_HOME env variable");
String kylinHome = getKylinHome();
if (StringUtils.isEmpty(kylinHome))
throw new KylinConfigCannotInitException("Didn't find KYLIN_CONF or KYLIN_HOME, please set one of them");
logger.info("Use KYLIN_HOME=" + kylinHome);
String path = kylinHome + File.separator + "conf";
return existFile(path);
}
发现getSitePropertiesFile()其实返回的是kylin配置文件存储的路径,该路径通过KYLIN_HOME环境变量添加/conf/kylin.properties获得,或者是通过KYLIN_CONF环境变量方式获取。
由于该段代码的执行是通过spark提交到yarn集群中运行的,而yarn集群所处的服务器并不存在kylin.properties配置文件,但是配置了KYLIN_HOME的环境变量,从而导致在进行proFile.exists()检验文件是否存在的时候,返回的false。从而抛出了RuntimeException。
而MeasureTypeFactory这个类的init方法只捕获了Exception并封装成IllegalArgumentException异常抛出,因此在此处便抛出异常,导致程序运行失败。
方案一:对于没有自定义的MeasureType类型只需要简单修改异常捕获,扩大异常范围。
try {
Map customMeasureTypes = KylinConfig.getInstanceFromEnv().getCubeCustomMeasureTypes();
for (String customFactory : customMeasureTypes.values()) {
try {
logger.info("Checking custom measure types from kylin config: " + customFactory);
factoryInsts.add((MeasureTypeFactory) Class.forName(customFactory).newInstance());
} catch (Exception e) {
throw new IllegalArgumentException("Unrecognized MeasureTypeFactory classname: " + customFactory,
e);
}
}
} catch (Exception e) {
logger.warn("Will not add custome MeasureTypeFactory as KYLIN_CONF nor KYLIN_HOME is set");
}
方案二:在yarn集群的服务器中去掉$KYLIN_HOME的配置,这样代码就不会去读取$KYLIN_HOME下面相关的配置参数去初始化KylinConfig这个类了。