最近添加了数据库监控后发现会有几十万分之一概率查询失败. 查看日志发现异常如下 :
Caused by: com.mysql.jdbc.exceptions.jdbc4.CommunicationsException: Communications link failure
监控如下:
连接池使用的dbcp 1.4版本. 查了一下同学们讲是mysql 连接如果8小时内持续空闲会被关闭.
通过mysql > show variables like '%timeout%';
查到结果确实如此
随即按照大家讲的将dbcp更换成了druid (1.1.2) 连接池.问题得到解决.另外修改mysql设置也可以好像, 由于集团弹性数据库修改起来流程比较繁琐. 没有深究
initialSize: 初始化连接个数
maxActive: 最大连接池数量
minIdle: 最小连接池数量
validationQuery: 用来检测连接是否有效的sql,要求是一个查询语句。如果validationQuery为null,testOnBorrow、testOnReturn、testWhileIdle都不会其作用。
testOnBorrow: false 申请连接时不执行validationQuery检测
testOnReturn: false 归还连接时不执行validationQuery检测
testWhileIdle: true 申请连接的时候检测,如果空闲时间大于timeBetweenEvictionRunsMillis,执行validationQuery检测
:timeBetweenEvictionRunsMillis 有两个含义:
1.testWhileIdle的判断依据,详细看testWhileIdle属性的说明 对应以下第一种方式
2.Destroy线程会检测连接的间隔时间 对应以下第二种方式
minEvictableIdleTimeMillis: Destroy worker执行时判断连接空闲时间是否大于 minEvictableIdleTimeMillis, 如果大于判断线程池中空闲连接数是否大于minIdle, 如果大于回收此连接
maxEvictableIdleTimeMillis: Destroy worker执行时判断连接空闲时间是否大于 maxEvictableIdleTimeMillis, 如果大于回收此连接(忽略minIdle).
public DruidPooledConnection getConnection() throws SQLException { //获取连接方法
return this.getConnection(this.maxWait);
}
public DruidPooledConnection getConnection(long maxWaitMillis) throws SQLException {
this.init();
if(this.filters.size() > 0) {
FilterChainImpl filterChain = new FilterChainImpl(this);
return filterChain.dataSource_connect(this, maxWaitMillis);
} else {
return this.getConnectionDirect(maxWaitMillis);
}
}
public DruidPooledConnection getConnectionDirect(long maxWaitMillis) throws SQLException {
int notFullTimeoutRetryCnt = 0;
DruidPooledConnection poolableConnection;
while(true) { //注意这里的循环.直到获取到或者抛出或者break; while循环才会出
while(true) {
try {
poolableConnection = this.getConnectionInternal(maxWaitMillis);//获取
break;
} catch (GetConnectionTimeoutException var23) {
if(notFullTimeoutRetryCnt > this.notFullTimeoutRetryCount || this.isFull()) {
throw var23;
}
++notFullTimeoutRetryCnt;
if(LOG.isWarnEnabled()) {
LOG.warn("get connection timeout retry : " + notFullTimeoutRetryCnt);
}
}
}
if(this.testOnBorrow) {
//如果testOnBorrow=true,每次获取连接时都会检查连接有效性.效率较差
boolean validate = this.testConnectionInternal(poolableConnection.holder, poolableConnection.conn);
if(validate) {
break;
}
if(LOG.isDebugEnabled()) {
LOG.debug("skip not validate connection.");
}
//销毁连接(检查有效性结果:无效)
this.discardConnection(poolableConnection.holder);
} else if(poolableConnection.conn.isClosed()) {
//如果连接已经关闭,销毁
this.discardConnection(poolableConnection.holder);
} else {
//如果testWhileIdle=false,break;不执行一下校验,连接被返回
if(!this.testWhileIdle) {
break;
}
DruidConnectionHolder holder = poolableConnection.holder;
long currentTimeMillis = System.currentTimeMillis();
long lastActiveTimeMillis = holder.lastActiveTimeMillis;
long lastExecTimeMillis = holder.lastExecTimeMillis;
long lastKeepTimeMillis = holder.lastKeepTimeMillis;
if(this.checkExecuteTime && lastExecTimeMillis != lastActiveTimeMillis) {
lastActiveTimeMillis = lastExecTimeMillis;
}
if(lastKeepTimeMillis > lastActiveTimeMillis) {
lastActiveTimeMillis = lastKeepTimeMillis;
}
long idleMillis = currentTimeMillis - lastActiveTimeMillis;
long timeBetweenEvictionRunsMillis = this.timeBetweenEvictionRunsMillis;
if(timeBetweenEvictionRunsMillis <= 0L) {
//默认60000ms,即1分钟
timeBetweenEvictionRunsMillis = 60000L;
}
/*
1.如果连接空闲时间 < timeBetweenEvictionRunsMillis时间
2.连接空闲时间 > 0
不校验,也就是说我们如果通过开启testWhileIdle参数
校验连接有效性的话timeBetweenEvictionRunsMillis 时间一定不能超过8小时,
不然依然可能取到失效链接.
*/
if(idleMillis < timeBetweenEvictionRunsMillis && idleMillis >= 0L) {
break;
}
//执行校验 (即 : validationQuery中配置的'select 1 from dual'语句)
boolean validate = this.testConnectionInternal(poolableConnection.holder, poolableConnection.conn);
if(validate) {
break;
}
if(LOG.isDebugEnabled()) {
LOG.debug("skip not validate connection.");
}
this.discardConnection(poolableConnection.holder);
}
}
if(this.removeAbandoned) {
StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
poolableConnection.connectStackTrace = stackTrace;
poolableConnection.setConnectedTimeNano();
poolableConnection.traceEnable = true;
this.activeConnectionLock.lock();
try {
this.activeConnections.put(poolableConnection, PRESENT);
} finally {
this.activeConnectionLock.unlock();
}
}
if(!this.defaultAutoCommit) {
poolableConnection.setAutoCommit(false);
}
return poolableConnection;
}
//init方法是线程池创建方法
public void init() throws SQLException {
...
this.createAndLogThread();
this.createAndStartCreatorThread();
//调用创建销毁线程方法
this.createAndStartDestroyThread();
this.initedLatch.await();
...
}
//创建销毁线程
protected void createAndStartDestroyThread() {
this.destroyTask = new DruidDataSource.DestroyTask();
if(this.destroyScheduler != null) {
long period = this.timeBetweenEvictionRunsMillis;
if(period <= 0L) {
period = 1000L;
}
//启动销毁线程
this.destroySchedulerFuture = this.destroyScheduler.scheduleAtFixedRate(this.destroyTask, period, period, TimeUnit.MILLISECONDS);
this.initedLatch.countDown();
} else {
String threadName = "Druid-ConnectionPool-Destroy-" + System.identityHashCode(this);
this.destroyConnectionThread = new DruidDataSource.DestroyConnectionThread(threadName);
this.destroyConnectionThread.start();
}
}
//销毁线程
public class DestroyTask implements Runnable {
public DestroyTask() {
}
public void run() {
//checkTime 为true
DruidDataSource.this.shrink(true, DruidDataSource.this.keepAlive);
if(DruidDataSource.this.isRemoveAbandoned()) {
DruidDataSource.this.removeAbandoned();
}
}
}
//具体方法
public void shrink(boolean checkTime, boolean keepAlive) {
try {
this.lock.lockInterruptibly();
} catch (InterruptedException var49) {
return;
}
boolean needFill = false;
int evictCount = 0;
int keepAliveCount = 0;
int fatalErrorIncrement = this.fatalErrorCount - this.fatalErrorCountLastShrink;
this.fatalErrorCountLastShrink = this.fatalErrorCount;
int checkCount;
label956: {
try {
if(this.inited) {
//可能被销毁数量 = 线程池当前线程数量 - 配置的最小空闲数
checkCount = this.poolingCount - this.minIdle;
long currentTimeMillis = System.currentTimeMillis();
int i;
for(i = 0; i < this.poolingCount; ++i) {
DruidConnectionHolder connection = this.connections[i];
if((this.onFatalError || fatalErrorIncrement > 0) && this.lastFatalErrorTimeMillis > connection.connectTimeMillis) {
this.keepAliveConnections[keepAliveCount++] = connection;
} else if(checkTime) {
long idleMillis;
if(this.phyTimeoutMillis > 0L) {
idleMillis = currentTimeMillis - connection.connectTimeMillis;
if(idleMillis > this.phyTimeoutMillis) {
this.evictConnections[evictCount++] = connection;
continue;
}
}
//当前for循环处理的线程空闲时间 = 当前时间 - 连接最后活跃时间
idleMillis = currentTimeMillis - connection.lastActiveTimeMillis;
if(idleMillis < this.minEvictableIdleTimeMillis && idleMillis < this.keepAliveBetweenTimeMillis) {
break;
}
//连接空闲时间 >= 配置的最小空闲被回收时间 : minEvictableIdleTimeMillis
if(idleMillis >= this.minEvictableIdleTimeMillis) {
/*
checkTime 为方法入参 = true,
i:当前for循环下标(连接取得时候是取得数组最大坐标,新创建的连接也是放在数组最大坐标上,所以0号坐标一定是最久未使用的那个)
可能被销毁数量 = 线程池当前线程数量 - 配置的最小空闲数(checkCount = this.poolingCount - this.minIdle; )
重点 : 也就是minEvictableIdleTimeMillis配置只会回收超过minIdle的那部分空闲连接
*/
if(checkTime && i < checkCount) {
this.evictConnections[evictCount++] = connection;
continue;
}
//连接空闲时间 > 配置的最大空闲时间maxEvictableIdleTimeMillis
//重点 : maxEvictableIdleTimeMillis参数会忽略配置的minIdle
if(idleMillis > this.maxEvictableIdleTimeMillis) {
this.evictConnections[evictCount++] = connection;
continue;
}
}
if(keepAlive && idleMillis >= this.keepAliveBetweenTimeMillis) {
this.keepAliveConnections[keepAliveCount++] = connection;
}
} else {
if(i >= checkCount) {
break;
}
this.evictConnections[evictCount++] = connection;
}
}
i = evictCount + keepAliveCount;
if(i > 0) { //复制有效连接到连接池数组
System.arraycopy(this.connections, i, this.connections, 0, this.poolingCount - i);
Arrays.fill(this.connections, this.poolingCount - i, this.poolingCount, (Object)null);
this.poolingCount -= i;
}
this.keepAliveCheckCount += keepAliveCount;
if(keepAlive && this.poolingCount + this.activeCount < this.minIdle) {
needFill = true;
}
break label956;
}
} finally {
this.lock.unlock();
}
return;
}
Connection connection;
DruidConnectionHolder holer;
if(evictCount > 0) { //销毁刚刚放在数组里的连接
for(checkCount = 0; checkCount < evictCount; ++checkCount) {
holer = this.evictConnections[checkCount];
connection = holer.getConnection();
JdbcUtils.close(connection);
destroyCountUpdater.incrementAndGet(this);
}
Arrays.fill(this.evictConnections, (Object)null);
}
...
}
1: 通过配置testOnBorrow=true
每次在连接取出时判断, 效率较差
2: 通过配置testWhileIdle=true
每次在连接取出时且取出的连接空闲时间超过timeBetweenEvictionRunsMillis
时判断,效率较高. 但要注意timeBetweenEvictionRunsMillis
的时间一定不能超过8个小时(mysql 自动释放连接时间)
3: 通过配置 timeBetweenEvictionRunsMillis
和minEvictableIdleTimeMillis
定时任务扫空闲线程,超过minEvictableIdleTimeMillis
空闲时间的被回收. 缺点:只能回收超出minIdle
配置的连接. 另外如果minIdle
和maxActive
的话, 此方法无效, 相当于没有配置
4: 通过配置timeBetweenEvictionRunsMillis
和maxEvictableIdleTimeMillis
作为第3种方案的后补方案,但注意timeBetweenEvictionRunsMillis
+maxEvictableIdleTimeMillis
一定不能>
8小时
[1]: 《亿级流量网站架构核心技术》 - 张开涛
[2]: druid1.0.21版本源码研究之连接回收(分析解决mysql8小时断线)
[3]: Druid配置参数详解-maxEvictableIdleTimeMillis,minEvictableIdleTimeMillis