背景:
在执行双机房部署的时候,因为应用长时间未访问数据库,导致后面访问的数据库的线程都被挂起。
现象分析:
"Thread-74" daemon prio=10 tid=0x00007f1840044000 nid=0x387b runnable [0x00007f18bdb27000] java.lang.Thread.State: RUNNABLE at java.net.SocketInputStream.socketRead0(Native Method) at java.net.SocketInputStream.read(SocketInputStream.java:129) at oracle.net.ns.Packet.receive(Unknown Source) at oracle.net.ns.DataPacket.receive(Unknown Source) at oracle.net.ns.NetInputStream.getNextPacket(Unknown Source) at oracle.net.ns.NetInputStream.read(Unknown Source) at oracle.net.ns.NetInputStream.read(Unknown Source) at oracle.net.ns.NetInputStream.read(Unknown Source) at oracle.jdbc.driver.T4CMAREngine.unmarshalUB1(T4CMAREngine.java:1109) at oracle.jdbc.driver.T4CMAREngine.unmarshalSB1(T4CMAREngine.java:1080) at oracle.jdbc.driver.T4C8Oall.receive(T4C8Oall.java:485) at oracle.jdbc.driver.T4CStatement.doOall8(T4CStatement.java:210) at oracle.jdbc.driver.T4CStatement.executeForDescribe(T4CStatement.java:804) at oracle.jdbc.driver.OracleStatement.executeMaybeDescribe(OracleStatement.java:1051) at oracle.jdbc.driver.T4CStatement.executeMaybeDescribe(T4CStatement.java:845) at oracle.jdbc.driver.OracleStatement.doExecuteWithTimeout(OracleStatement.java:1156) at oracle.jdbc.driver.OracleStatement.executeQuery(OracleStatement.java:1315) - locked <0x00000000be528248> (a oracle.jdbc.driver.T4CStatement) - locked <0x00000000c4d1b000> (a oracle.jdbc.driver.T4CConnection) at oracle.jdbc.driver.PhysicalConnection.doPingDatabase(PhysicalConnection.java:4614) at oracle.jdbc.driver.PhysicalConnection$1.run(PhysicalConnection.java:4590) at java.lang.Thread.run(Thread.java:662) Locked ownable synchronizers: DubboServerHandler-172.21.55.25:20883-thread-13" daemon prio=10 tid=0x00007f1828012800 nid=0x3867 in Object.wait() [0x00007f18be305000] java.lang.Thread.State: TIMED_WAITING (on object monitor) at java.lang.Object.wait(Native Method) - waiting on <0x00000000be51e0b0> (a java.lang.Thread) at java.lang.Thread.join(Thread.java:1194) - locked <0x00000000be51e0b0> (a java.lang.Thread) at oracle.jdbc.driver.PhysicalConnection.pingDatabase(PhysicalConnection.java:4596) at sun.reflect.GeneratedMethodAccessor3.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) at java.lang.reflect.Method.invoke(Method.java:597) at com.alibaba.druid.pool.vendor.OracleValidConnectionChecker.isValidConnection(OracleValidConnectionChecker.java:94) at com.alibaba.druid.pool.DruidAbstractDataSource.testConnectionInternal(DruidAbstractDataSource.java:1252) at com.alibaba.druid.pool.DruidDataSource.getConnectionDirect(DruidDataSource.java:954) at com.alibaba.druid.pool.DruidDataSource.getConnection(DruidDataSource.java:921) at com.alibaba.druid.pool.DruidDataSource.getConnection(DruidDataSource.java:911) at com.alibaba.druid.pool.DruidDataSource.getConnection(DruidDataSource.java:98) at org.springframework.jdbc.datasource.DataSourceUtils.doGetConnection(DataSourceUtils.java:113) at org.springframework.jdbc.datasource.TransactionAwareDataSourceProxy$TransactionAwareInvocationHandler.invoke(TransactionAwareDataSourceProxy.java:210) at $Proxy25.toString(Unknown Source) at java.lang.String.valueOf(String.java:2826) at java.lang.StringBuffer.append(StringBuffer.java:219) - locked <0x00000000be4cd210> (a java.lang.StringBuffer) at org.springframework.orm.ibatis.SqlMapClientTemplate.execute(SqlMapClientTemplate.java:194) at org.springframework.orm.ibatis.SqlMapClientTemplate.executeWithListResult(SqlMapClientTemplate.java:249) at org.springframework.orm.ibatis.SqlMapClientTemplate.queryForList(SqlMapClientTemplate.java:296) at com.lianpay.bank.mng.dao.impl.BaseIbatisDAOImpl.queryAll(BaseIbatisDAOImpl.java:123) at com.lianpay.bank.mng.service.impl.QueryBankListServiceImpl.queryPayTypeList(QueryBankListServiceImpl.java:319) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
我们可以发现线程thread-13一直被阻塞在pingDatabase方法调用中,而线程Thread-74一直卡在socketRead0,初步分析是没有设置socket timeout时间。
解决:
参考文档:
http://agapple.iteye.com/blog/772507
http://www.importnew.com/2466.html
http://agapple.iteye.com/blog/1024508
查看上面两个参考文档之后,发现需要配置validationQueryTimeout,oracle.net.CONNECT_TIMEOUT=3000&oracle.net.READ_TIMEOUT=60000这几个参数。
源码分析:
问题1:我们设置了maxWait参数,为什么获取链接的时候未超时,报异常
问题2:我们设置了validationQuery,为什么没有定期发送ping包过去保活
一: maxWait参数主要作用在getConnectionInternal,而我们现在是卡在testConnectionInternal中,表示是已经获取到connection,但是卡在验证connection的有效性中。
public DruidPooledConnection getConnectionDirect(long maxWaitMillis) throws SQLException { int notFullTimeoutRetryCnt = 0; for (;;) { // handle notFullTimeoutRetry DruidPooledConnection poolableConnection; try { poolableConnection = getConnectionInternal(maxWaitMillis); } catch (GetConnectionTimeoutException ex) { if (notFullTimeoutRetryCnt <= this.notFullTimeoutRetryCount && !isFull()) { notFullTimeoutRetryCnt++; if (LOG.isWarnEnabled()) { LOG.warn("not full timeout retry : " + notFullTimeoutRetryCnt); } continue; } throw ex; } if (isTestOnBorrow()) { boolean validate = testConnectionInternal(poolableConnection.getConnection()); if (!validate) { if (LOG.isDebugEnabled()) { LOG.debug("skip not validate connection."); } Connection realConnection = poolableConnection.getConnection(); discardConnection(realConnection); continue; } }
2. validationQuery不是用来保证socket长链接不断开的。validationQuery的调用有两处,一是创建链接的时候,二是获取到链接的时候,会验证有效性。
protected boolean testConnectionInternal(Connection conn) { String sqlFile = JdbcSqlStat.getContextSqlFile(); String sqlName = JdbcSqlStat.getContextSqlName(); if (sqlFile != null) { JdbcSqlStat.setContextSqlFile(null); } if (sqlName != null) { JdbcSqlStat.setContextSqlName(null); } try { if (validConnectionChecker != null) { return validConnectionChecker.isValidConnection(conn, validationQuery, validationQueryTimeout); } if (conn.isClosed()) { return false; } if (null == validationQuery) { return true; } Statement stmt = null; ResultSet rset = null; try { stmt = conn.createStatement(); if (getValidationQueryTimeout() > 0) { stmt.setQueryTimeout(validationQueryTimeout); } rset = stmt.executeQuery(validationQuery); if (!rset.next()) { return false; } } finally { JdbcUtils.close(rset); JdbcUtils.close(stmt); } return true; } catch (Exception ex) { // skip return false; } finally { if (sqlFile != null) { JdbcSqlStat.setContextSqlFile(sqlFile); } if (sqlName != null) { JdbcSqlStat.setContextSqlName(sqlName); } } }
如果socket链接断开不可用,也不会导致应用异常,druid框架会不断的重试,抛弃不可用链接,直到拿到可用的链接
public DruidPooledConnection getConnectionDirect(long maxWaitMillis) throws SQLException { int notFullTimeoutRetryCnt = 0; for (;;) { // handle notFullTimeoutRetry DruidPooledConnection poolableConnection; try { poolableConnection = getConnectionInternal(maxWaitMillis); } catch (GetConnectionTimeoutException ex) { if (notFullTimeoutRetryCnt <= this.notFullTimeoutRetryCount && !isFull()) { notFullTimeoutRetryCnt++; if (LOG.isWarnEnabled()) { LOG.warn("not full timeout retry : " + notFullTimeoutRetryCnt); } continue; } throw ex; } if (isTestOnBorrow()) { boolean validate = testConnectionInternal(poolableConnection.getConnection()); if (!validate) { if (LOG.isDebugEnabled()) { LOG.debug("skip not validate connection."); } Connection realConnection = poolableConnection.getConnection(); discardConnection(realConnection); continue; } } else { Connection realConnection = poolableConnection.getConnection(); if (realConnection.isClosed()) { discardConnection(null); // 传入null,避免重复关闭 continue; } if (isTestWhileIdle()) { final long currentTimeMillis = System.currentTimeMillis(); final long lastActiveTimeMillis = poolableConnection.getConnectionHolder().getLastActiveTimeMillis(); final long idleMillis = currentTimeMillis - lastActiveTimeMillis; long timeBetweenEvictionRunsMillis = this.getTimeBetweenEvictionRunsMillis(); if (timeBetweenEvictionRunsMillis <= 0) { timeBetweenEvictionRunsMillis = DEFAULT_TIME_BETWEEN_EVICTION_RUNS_MILLIS; } if (idleMillis >= timeBetweenEvictionRunsMillis) { boolean validate = testConnectionInternal(poolableConnection.getConnection()); if (!validate) { if (LOG.isDebugEnabled()) { LOG.debug("skip not validate connection."); } discardConnection(realConnection); continue; } } } } if (isRemoveAbandoned()) { StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace(); poolableConnection.setConnectStackTrace(stackTrace); poolableConnection.setConnectedTimeNano(); poolableConnection.setTraceEnable(true); synchronized (activeConnections) { activeConnections.put(poolableConnection, PRESENT); } } if (!this.isDefaultAutoCommit()) { poolableConnection.setAutoCommit(false); } return poolableConnection; } }