前两周在公司的zeppelin上配置了一个interpreter用来解决在zeppelin中直接查询hive,便于测试同学的使用,还有结果的查看。(毕竟在beeline中查起来挺麻烦的)配置过后我只是简单使用一下就没有理会,这两天有同学反馈在使用一段时间会报一个错误
java.sql.SQLException: Could not open client transport with JDBC Uri: jdbc:hive2://xxxxxx:13000/;principal=sql_prc/[email protected]?mapreduce.job.queuename=xxxx: GSS initiate failed
at org.apache.hive.jdbc.HiveConnection.openTransport(HiveConnection.java:215)
at org.apache.hive.jdbc.HiveConnection.(HiveConnection.java:163)
at org.apache.hive.jdbc.HiveDriver.connect(HiveDriver.java:105)
at java.sql.DriverManager.getConnection(DriverManager.java:664)
at java.sql.DriverManager.getConnection(DriverManager.java:208)
at org.apache.commons.dbcp2.DriverManagerConnectionFactory.createConnection(DriverManagerConnectionFactory.java:79)
at org.apache.commons.dbcp2.PoolableConnectionFactory.makeObject(PoolableConnectionFactory.java:205)
at org.apache.commons.pool2.impl.GenericObjectPool.create(GenericObjectPool.java:861)
at org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:435)
at org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:363)
at org.apache.commons.dbcp2.PoolingDriver.connect(PoolingDriver.java:129)
at java.sql.DriverManager.getConnection(DriverManager.java:664)
at java.sql.DriverManager.getConnection(DriverManager.java:270)
at org.apache.zeppelin.jdbc.JDBCInterpreter.getConnectionFromPool(JDBCInterpreter.java:342)
at org.apache.zeppelin.jdbc.JDBCInterpreter.getConnection(JDBCInterpreter.java:394)
at org.apache.zeppelin.jdbc.JDBCInterpreter.executeSql(JDBCInterpreter.java:570)
at org.apache.zeppelin.jdbc.JDBCInterpreter.interpret(JDBCInterpreter.java:669)
at org.apache.zeppelin.interpreter.LazyOpenInterpreter.interpret(LazyOpenInterpreter.java:94)
at org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer$InterpretJob.jobRun(RemoteInterpreterServer.java:489)
at org.apache.zeppelin.scheduler.Job.run(Job.java:175)
at org.apache.zeppelin.scheduler.ParallelScheduler$JobRunner.run(ParallelScheduler.java:162)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.thrift.transport.TTransportException: GSS initiate failed
at org.apache.thrift.transport.TSaslTransport.sendAndThrowMessage(TSaslTransport.java:221)
at org.apache.thrift.transport.TSaslTransport.open(TSaslTransport.java:297)
at org.apache.thrift.transport.TSaslClientTransport.open(TSaslClientTransport.java:37)
at org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport$1.run(TUGIAssumingTransport.java:52)
at org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport$1.run(TUGIAssumingTransport.java:49)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1854)
at org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport.open(TUGIAssumingTransport.java:49)
at org.apache.hive.jdbc.HiveConnection.openTransport(HiveConnection.java:190)
很明显,是因为kerberos没有认证,猜测是因为zeppelin的Bug,之后看了一下zeppelin的源码,果然是一个Bug
下面贴一下关键代码
JDBCInterpreter.java
public void open() {
......
只有在第一次执行interpreter的时候登录了
if (!isEmpty(property.getProperty("zeppelin.jdbc.auth.type"))) {
JDBCSecurityImpl.createSecureConfiguration(property);
}
......
}
在获取Connection的时候并没有判断登录状态下面的代码中补充了上去
public Connection getConnection(String propertyKey, InterpreterContext interpreterContext)
throws ClassNotFoundException, SQLException, InterpreterException, IOException {
....
switch (authType) {
case KERBEROS:
if (user == null) {
connection = getConnectionFromPool(url, user, propertyKey, properties);
} else {
if ("hive".equalsIgnoreCase(propertyKey)) {
// 在解决kerberos认证超时,重新登录问题 add by levin
if (UserGroupInformation.isLoginKeytabBased() == false) {
UserGroupInformation.getLoginUser().reloginFromKeytab();
}
logger.info("propertyKey是hive");
StringBuilder connectionUrl = new StringBuilder(url);
Integer lastIndexOfUrl = connectionUrl.indexOf("?");
if (lastIndexOfUrl == -1) {
lastIndexOfUrl = connectionUrl.length();
}
logger.info("connectionUrl: " + connectionUrl);
// 没有代理用户所以注释掉
//connectionUrl.insert(lastIndexOfUrl, ";hive.server2.proxy.user=" + user + ";");
connection = getConnectionFromPool(connectionUrl.toString(),
user, propertyKey, properties);
} else {
UserGroupInformation ugi = null;
try {
ugi = UserGroupInformation.createProxyUser(user,
UserGroupInformation.getCurrentUser());
} catch (Exception e) {
logger.error("Error in createProxyUser", e);
StringBuilder stringBuilder = new StringBuilder();
stringBuilder.append(e.getMessage()).append("\n");
stringBuilder.append(e.getCause());
throw new InterpreterException(stringBuilder.toString());
}
final String poolKey = propertyKey;
try {
connection = ugi.doAs(new PrivilegedExceptionAction() {
@Override
public Connection run() throws Exception {
return getConnectionFromPool(url, user, poolKey, properties);
}
});
} catch (Exception e) {
logger.error("Error in doAs", e);
StringBuilder stringBuilder = new StringBuilder();
stringBuilder.append(e.getMessage()).append("\n");
stringBuilder.append(e.getCause());
throw new InterpreterException(stringBuilder.toString());
}
}
}
break;
default:
connection = getConnectionFromPool(url, user, propertyKey, properties);
}
}
....
这个bug在 0.7.1, 0.8.0 中修复了公司的版本比较低,如果有其他人遇到了可以尝试升级解决,由于公司的zeppelin做了很多二次开发,升级不是很方便就直接改了一下源码。
https://issues.apache.org/jira/browse/ZEPPELIN-2063
https://stackoverflow.com/questions/34616676/should-i-call-ugi-checktgtandreloginfromkeytab-before-every-action-on-hadoop