今天发现Jetty没有响应了,重启就好了,重启之前抓了一个dump分析了下里面的堆栈信息,发现Jetty的所有工作线程都被一把锁给hang住了:
at sun.misc.Unsafe.park(Native Method)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:158)
at java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:811)
at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:842)
Local Variable: java.util.concurrent.locks.AbstractQueuedSynchronizer$Node#286
at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1178)
at java.util.concurrent.locks.ReentrantLock$NonfairSync.lock(ReentrantLock.java:186)
at java.util.concurrent.locks.ReentrantLock.lock(ReentrantLock.java:262)
at org.apache.http.nio.pool.AbstractNIOConnPool.lease(AbstractNIOConnPool.java:271)
Local Variable: org.apache.http.impl.nio.conn.PoolingNHttpClientConnectionManager$InternalPoolEntryCallback#35
Local Variable: org.apache.http.concurrent.BasicFuture#187
at org.apache.http.impl.nio.conn.PoolingNHttpClientConnectionManager.requestConnection(PoolingNHttpClientConnectionManager.java:265)
Local Variable: org.apache.http.concurrent.BasicFuture#98
Local Variable: org.apache.http.impl.nio.client.AbstractClientExchangeHandler$1#34
Local Variable: org.apache.http.nio.conn.NoopIOSessionStrategy#1
at org.apache.http.impl.nio.client.AbstractClientExchangeHandler.requestConnection(AbstractClientExchangeHandler.java:358)
Local Variable: org.apache.http.client.config.RequestConfig#96
Local Variable: org.apache.http.conn.routing.HttpRoute#215
at org.apache.http.impl.nio.client.DefaultClientExchangeHandlerImpl.start(DefaultClientExchangeHandlerImpl.java:125)
at org.apache.http.impl.nio.client.InternalHttpAsyncClient.execute(InternalHttpAsyncClient.java:141)
Local Variable: org.apache.http.nio.client.methods.HttpAsyncMethods$RequestProducerImpl#51
Local Variable: org.apache.http.concurrent.BasicFuture#99
Local Variable: org.apache.http.nio.protocol.BasicAsyncResponseConsumer#40
Local Variable: org.apache.http.impl.nio.client.DefaultClientExchangeHandlerImpl#42
at org.apache.http.impl.nio.client.CloseableHttpAsyncClient.execute(CloseableHttpAsyncClient.java:74)
at org.apache.http.impl.nio.client.CloseableHttpAsyncClient.execute(CloseableHttpAsyncClient.java:107)
Local Variable: org.apache.http.HttpHost#127
Local Variable: org.apache.http.client.protocol.HttpClientContext#49
******
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1322)
at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:473)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:119)
at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:479)
Local Variable: org.eclipse.jetty.security.ConstraintSecurityHandler#1
Local Variable: org.eclipse.jetty.security.authentication.BasicAuthenticator#1
Local Variable: org.eclipse.jetty.security.authentication.DeferredAuthentication#1
at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226)
at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:929)
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:403)
Local Variable: org.eclipse.jetty.server.DispatcherType#1
at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:184)
Local Variable: org.eclipse.jetty.server.session.SessionHandler#1
at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:864)
Local Variable: java.lang.String#241755
Local Variable: sun.misc.Launcher$AppClassLoader#1
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:117)
at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:247)
Local Variable: org.eclipse.jetty.util.SingletonList#3
Local Variable: org.eclipse.jetty.server.handler.ContextHandlerCollection#1
Local Variable: org.eclipse.jetty.webapp.WebAppContext#1
at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:151)
Local Variable: org.eclipse.jetty.server.Response#610
Local Variable: org.eclipse.jetty.server.Request#498
Local Variable: org.eclipse.jetty.server.handler.HandlerCollection#1
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:114)
at org.eclipse.jetty.server.Server.handle(Server.java:352)
at org.eclipse.jetty.server.HttpConnection.handleRequest(HttpConnection.java:596)
Local Variable: org.eclipse.jetty.server.Server#1
Local Variable: java.lang.String#204075
at org.eclipse.jetty.server.HttpConnection$RequestHandler.headerComplete(HttpConnection.java:1051)
Local Variable: org.eclipse.jetty.server.HttpConnection$RequestHandler#416
at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:590)
at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:212)
Local Variable: org.eclipse.jetty.http.HttpParser#411
at org.eclipse.jetty.server.HttpConnection.handle(HttpConnection.java:426)
Local Variable: org.eclipse.jetty.server.nio.SelectChannelConnector$3#616
at org.eclipse.jetty.io.nio.SelectChannelEndPoint.handle(SelectChannelEndPoint.java:508)
Local Variable: org.eclipse.jetty.io.nio.SelectChannelEndPoint#585
at org.eclipse.jetty.io.nio.SelectChannelEndPoint.access$000(SelectChannelEndPoint.java:34)
at org.eclipse.jetty.io.nio.SelectChannelEndPoint$1.run(SelectChannelEndPoint.java:40)
at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:451)
Local Variable: org.eclipse.jetty.io.nio.SelectChannelEndPoint$1#414
at java.lang.Thread.run(Thread.java:662)
我把关键的堆栈信息标红了,Jetty的工作线程在向异步连接池PoolingNHttpClientConnectionManager申请链接的时候,拿不到锁,线程就hang在那儿了。
看了下Apache的Nio连接池的代码发现里面有一把很重要的锁:
/** * Abstract non-blocking connection pool. * * @paramroute * @param connection object * @param pool entry * * @since 4.2 */ @ThreadSafe public abstract class AbstractNIOConnPool > implements ConnPool , ConnPoolControl { private final ConnectingIOReactor ioreactor; private final NIOConnFactory connFactory; private final SocketAddressResolver addressResolver; private final SessionRequestCallback sessionRequestCallback; private final Map > routeToPool; private final LinkedList > leasingRequests; private final Set pending; private final Set leased; private final LinkedList available; private final ConcurrentLinkedQueue > completedRequests; private final Map maxPerRoute; private final Lock lock; private final AtomicBoolean isShutDown; private volatile int defaultMaxPerRoute; private volatile int maxTotal; .
这把锁无敌了,所有对连接池链接的状态修改,都需要获取这把锁,比如获取链接:
/** * @since 4.3 */ public Futurelease( final T route, final Object state, final long connectTimeout, final long leaseTimeout, final TimeUnit tunit, final FutureCallback callback) { Args.notNull(route, "Route"); Args.notNull(tunit, "Time unit"); Asserts.check(!this.isShutDown.get(), "Connection pool shut down"); final BasicFuture future = new BasicFuture (callback); this.lock.lock(); try { final long timeout = connectTimeout > 0 ? tunit.toMillis(connectTimeout) : 0; final LeaseRequest request = new LeaseRequest (route, state, timeout, leaseTimeout, future); final boolean completed = processPendingRequest(request); if (!request.isDone() && !completed) { this.leasingRequests.add(request); } if (request.isDone()) { this.completedRequests.add(request); } } finally { this.lock.unlock(); } fireCallbacks(); return future;
或者活干完了,把连接还回来:
protected void requestCompleted(final SessionRequest request) { if (this.isShutDown.get()) { return; } @SuppressWarnings("unchecked") final T route = (T) request.getAttachment(); this.lock.lock(); try { this.pending.remove(request); final RouteSpecificPoolpool = getPool(route); final IOSession session = request.getSession(); try { final C conn = this.connFactory.create(route, session); final E entry = pool.createEntry(request, conn); this.leased.add(entry); pool.completed(request, entry); onLease(entry); } catch (final IOException ex) { pool.failed(request, ex); } } finally { this.lock.unlock(); } fireCallbacks(); }
这样如果有一个线程在使用连接资源过程中,在释放锁之前hang住了,整个连接池就费了,想取连接的工作线程会hang在这把锁上,想归还连接的工作线程也会hang在这把锁上,然后所有线程都hang住之后,整个服务就不能工作了。
然后我看了整个线程堆栈信息,还真有一个拿着锁的线程在释放连接的时候被hang住了:
一个线程hang住了,把所有线程都hang住了,并且没有保护机制,apache的这个线程池是否需要改进一下?
至于线程为什么在关闭连接的时候被hang住了,查了很多资料,国内外的开发者一直表示可能是在设置TCP连接的SO_LINGER这个属性不当导致的一个bug。SO_LINGER选项用来设置延迟关闭的时间,等待SOCKET发送缓冲区中的数据发送完成。没有设置该选项时,在调用close()后,在发送完FIN后会立即进行一些清理工作并返回。如果设置了SO_LINGER选项,并且等待时间为正值,则在清理之前会等待一段时间。我的程序这个属性设置为100,可能在处理这个等待的过程中遇到一个系统bug或者别的原因,就hang住了。这个属性如果不设置反而不会出现hang的问题。