大家好,我是入错行的bug猫。(http://blog.csdn.net/qq_41399429,谢绝转载)
这是一次非常非常非常脑蛋疼的debug
经历
上周五bug猫请假外出。在路上收到公司的紧急电话,说是系统访问慢,各种地方报错。
初步判断是连接池满了,无法获取到Jdbc链接。已经重启过几次了,让bug猫抓紧解决!
当时bug猫身边仅一部手机,赶紧去网吧连远程公司电脑,然后上服务器一看系统的监控,Jdbc的使用方式长这样:
刚开始最大连接数是100,后来改成250。图中Jdbc数量垂直下降原因是,系统重启…
每隔8个小时左右,Jdbc就满了,只能重启系统。电话都被业务部门打爆了…
放大点看,是一个波度平滑的直角三角形!等等,这不是积分电路么?!(暴露专业
毫无疑问,这是Jdbc外泄露!系统Jdbc采用的是阿里的德鲁伊,按照druid官方文档,设置强制回收超时未释放的链接
... ...
... ...
没卯月!
添加druid自带的监控,
DruidStatView
com.alibaba.druid.support.http.StatViewServlet
resetEnable
true
loginUsername
bugcat
loginPassword
bugcat972245132
DruidStatView
/druid/*
监控上显示一切正常!
猫了个咪 (╯‵□′)╯︵┻━┻
据说第一次挂的时候,系统已经一个星期没有更新和升级,就是突然发生这种情况。
检查了这段时间内所有提交的代码,有关操作数据库的地方,貌似都没有问题。
观察Jdbc泄露的频率,近似是一条直线,猜测是定时任务造成的,把所有的定时任务全部停止,还是泄露!
系统庞大,到处都是诗山。可能是以前写的功能,之前没人用,现在又用起来了,或者第三方突然发疯猛调接口,或者是以前埋下的暗雷,现在起爆了,全盘检查代码肯定不现实!
QAQ
然后把druid换成了据说目前最牛皮的追光者HikariCP!
依旧是泄露!
根据Jdbc每小时泄露的次数,反向推算一天的访问量,再看访问日志,确定大概是哪个接口造成。等等一系列方法,各种手段都试过,就是不行!!
于是只能手写一个监控了
CatHikariDataSource,自定义的一个dataSource
,继承HikariCP
,getConnection()
返回增强后的CatConnection
import com.zaxxer.hikari.HikariDataSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.Iterator;
import java.util.Map;
import java.util.Timer;
import java.util.TimerTask;
public class CatHikariDataSource extends HikariDataSource {
private static Logger log = LoggerFactory.getLogger(CatHikariDataSource.class);
private static final long max_time = 1200000;
/**
* 获取 jdbc 链接
* @return 增强后的Connection
* */
@Override
public Connection getConnection() throws SQLException {
Connection conn = super.getConnection();
CatConnection catConn = new CatConnection(conn);
return catConn;
}
/**
* 每隔20分钟检查一次未关闭的链接,记录到日志中
* */
public void init(){
Map connMap = CatConnection.getConnMap();
Timer timer = new Timer();
timer.schedule(new TimerTask(){
@Override
public void run() {
long now = System.currentTimeMillis();
Iterator iter = connMap.values().iterator();
while ( iter.hasNext() ) {
CatConnection conn = iter.next();
if( conn.getActiveTime() + max_time < now ){ //超过20分钟没有提交事务、回滚事务
log.warn(DateUtil.getStringDate() + ">>未释放的jdbc链接:lastSql=" + conn.getLastSql(), conn.getEx());
}
}
}
}, max_time , max_time);
}
}
CatConnection 使用装饰模式,增强Connection
import java.sql.*;
import java.util.Map;
import java.util.Properties;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executor;
public class CatConnection implements Connection {
/**
* 所有活跃的的jdbc链接
* */
private static Map connMap = new ConcurrentHashMap<>(300);
private String id;
private long activeTime; //最后活跃时间
private String lastSql; //该链接执行的最后一个sql
private Throwable ex; //调用的堆栈信息
private Connection conn; //被装饰的原始Connection对象
public CatConnection(Connection conn) {
this.conn = conn;
this.id = UUID.randomUUID().toString();
this.activeTime = System.currentTimeMillis();
ex = new Throwable(); //记录调用者堆栈信息,重要
connMap.put(this.id, this);
}
@Override
public Statement createStatement() throws SQLException {
return conn.createStatement();
}
@Override
public PreparedStatement prepareStatement(String sql) throws SQLException {
this.lastSql = sql;
return conn.prepareStatement(sql);
}
@Override
public CallableStatement prepareCall(String sql) throws SQLException {
this.lastSql = sql;
return conn.prepareCall(sql);
}
@Override
public String nativeSQL(String sql) throws SQLException {
this.lastSql = sql;
return conn.nativeSQL(sql);
}
@Override
public void setAutoCommit(boolean autoCommit) throws SQLException {
conn.setAutoCommit(autoCommit);
}
@Override
public boolean getAutoCommit() throws SQLException {
return conn.getAutoCommit();
}
@Override
public void commit() throws SQLException {
conn.commit();
this.activeTime = System.currentTimeMillis();
}
@Override
public void rollback() throws SQLException {
conn.rollback();
this.activeTime = System.currentTimeMillis();
}
@Override
public void close() throws SQLException {
conn.close();
connMap.remove(this.id);
}
@Override
public boolean isClosed() throws SQLException {
return conn.isClosed();
}
@Override
public DatabaseMetaData getMetaData() throws SQLException {
return conn.getMetaData();
}
@Override
public void setReadOnly(boolean readOnly) throws SQLException {
conn.setReadOnly(readOnly);
}
@Override
public boolean isReadOnly() throws SQLException {
return conn.isReadOnly();
}
@Override
public void setCatalog(String catalog) throws SQLException {
conn.setCatalog(catalog);
}
@Override
public String getCatalog() throws SQLException {
return conn.getCatalog();
}
@Override
public void setTransactionIsolation(int level) throws SQLException {
conn.setTransactionIsolation(level);
}
@Override
public int getTransactionIsolation() throws SQLException {
return conn.getTransactionIsolation();
}
@Override
public SQLWarning getWarnings() throws SQLException {
return conn.getWarnings();
}
@Override
public void clearWarnings() throws SQLException {
conn.clearWarnings();
}
@Override
public Statement createStatement(int resultSetType, int resultSetConcurrency) throws SQLException {
return conn.createStatement(resultSetType, resultSetConcurrency);
}
@Override
public PreparedStatement prepareStatement(String sql, int resultSetType, int resultSetConcurrency) throws SQLException {
return conn.prepareStatement(sql, resultSetType, resultSetConcurrency);
}
@Override
public CallableStatement prepareCall(String sql, int resultSetType, int resultSetConcurrency) throws SQLException {
return conn.prepareCall(sql, resultSetType, resultSetConcurrency);
}
@Override
public Map> getTypeMap() throws SQLException {
return conn.getTypeMap();
}
@Override
public void setTypeMap(Map> map) throws SQLException {
conn.setTypeMap(map);
}
@Override
public void setHoldability(int holdability) throws SQLException {
conn.setHoldability(holdability);
}
@Override
public int getHoldability() throws SQLException {
return conn.getHoldability();
}
@Override
public Savepoint setSavepoint() throws SQLException {
return conn.setSavepoint();
}
@Override
public Savepoint setSavepoint(String name) throws SQLException {
return conn.setSavepoint(name);
}
@Override
public void rollback(Savepoint savepoint) throws SQLException {
conn.rollback(savepoint);
}
@Override
public void releaseSavepoint(Savepoint savepoint) throws SQLException {
conn.releaseSavepoint(savepoint);
}
@Override
public Statement createStatement(int resultSetType, int resultSetConcurrency, int resultSetHoldability) throws SQLException {
return conn.createStatement(resultSetType, resultSetConcurrency, resultSetHoldability);
}
@Override
public PreparedStatement prepareStatement(String sql, int resultSetType, int resultSetConcurrency, int resultSetHoldability) throws SQLException {
return conn.prepareStatement(sql, resultSetType, resultSetConcurrency, resultSetHoldability);
}
@Override
public CallableStatement prepareCall(String sql, int resultSetType, int resultSetConcurrency, int resultSetHoldability) throws SQLException {
return conn.prepareCall(sql, resultSetType, resultSetConcurrency, resultSetHoldability);
}
@Override
public PreparedStatement prepareStatement(String sql, int autoGeneratedKeys) throws SQLException {
return conn.prepareStatement(sql, autoGeneratedKeys);
}
@Override
public PreparedStatement prepareStatement(String sql, int[] columnIndexes) throws SQLException {
return conn.prepareStatement(sql, columnIndexes);
}
@Override
public PreparedStatement prepareStatement(String sql, String[] columnNames) throws SQLException {
return conn.prepareStatement(sql, columnNames);
}
@Override
public Clob createClob() throws SQLException {
return conn.createClob();
}
@Override
public Blob createBlob() throws SQLException {
return conn.createBlob();
}
@Override
public NClob createNClob() throws SQLException {
return conn.createNClob();
}
@Override
public SQLXML createSQLXML() throws SQLException {
return conn.createSQLXML();
}
@Override
public boolean isValid(int timeout) throws SQLException {
return conn.isValid(timeout);
}
@Override
public void setClientInfo(String name, String value) throws SQLClientInfoException {
conn.setClientInfo(name, value);
}
@Override
public void setClientInfo(Properties properties) throws SQLClientInfoException {
conn.setClientInfo(properties);
}
@Override
public String getClientInfo(String name) throws SQLException {
return conn.getClientInfo(name);
}
@Override
public Properties getClientInfo() throws SQLException {
return conn.getClientInfo();
}
@Override
public Array createArrayOf(String typeName, Object[] elements) throws SQLException {
return conn.createArrayOf(typeName, elements);
}
@Override
public Struct createStruct(String typeName, Object[] attributes) throws SQLException {
return conn.createStruct(typeName, attributes);
}
@Override
public void setSchema(String schema) throws SQLException {
conn.setSchema(schema);
}
@Override
public String getSchema() throws SQLException {
return conn.getSchema();
}
@Override
public void abort(Executor executor) throws SQLException {
conn.abort(executor);
}
@Override
public void setNetworkTimeout(Executor executor, int milliseconds) throws SQLException {
conn.setNetworkTimeout(executor, milliseconds);
}
@Override
public int getNetworkTimeout() throws SQLException {
return conn.getNetworkTimeout();
}
@Override
public T unwrap(Class iface) throws SQLException {
return conn.unwrap(iface);
}
@Override
public boolean isWrapperFor(Class> iface) throws SQLException {
return conn.isWrapperFor(iface);
}
public static Map getConnMap() {
return connMap;
}
public String getLastSql() {
return lastSql;
}
public long getActiveTime() {
return activeTime;
}
public Throwable getEx() {
return ex;
}
}
然后配置DataSource
的地方,指向CatHikariDataSource,并且配置初始方法为init
...
然后坐等半个小时,看日志,就可知道是哪个坑爹的类、哪个sql出错了!
最终泄露原因是,有一个类手动开启了事务,后面紧跟着一个超级大的try/catch
代码块,在try
最后执行commit
,catch
块里面执行rollback
。
bug猫有几次都检查到这个地方,看似没问题是吧,哼哼~尼玛在try/catch
代码块中,竟然有一个return
!!!
心中有十万个草泥马扭着屁股甩着尾巴奔腾而过…
为什么说蛋疼呢,其实druid早就已经给出了问题所在,德鲁伊的监控显示一切正常,说明泄露的Jdbc不归它管!
全局一搜,就2处是手动开启事务…
最终手动开启事务并且直接return的同事被领导和bug猫叼了个爽