版本:opensearch-rest-high-level-client-2.3.0.jar,httpcore-nio-4.4.11.jar,httpasyncclient-4.1.4.jar
初始化es索引逻辑是监听大数据团队消息,然后异步写入es(org.opensearch.client.RestHighLevelClient#bulkAsync),qps很低就将服务cpu接近打满,通过排查问题原因是消息消费很快,es写入有瓶颈,由于是异步写入,那么请求都积压在服务导致服务内存不足频繁GC,进而导致cpu飙高,那么es客户端是如何初始化的?异步线程池是什么类型队列为什么不会积压阻塞?那么带着问题去学习事半功倍
默认实现类:DefaultConnectingIOReactor
实现类:PoolingNHttpClientConnectionManager
实现类:InternalHttpAsyncClient
实现类:InternalHttpAsyncClient
org.apache.http.impl.nio.reactor.DefaultConnectingIOReactor#processEvents
方法
方法
结合问题背景,有朋友可能已经有原因推断了,如果消息不断产生,生产的速度大于异步写es数据的速度,那么积压的请求都会堆积在leasingRequests这个无限链表里,那么就会出现gc频繁,并且无法回收,导致cpu飙高,降低服务的并发与吞吐量
代码很简单,并发100线程异步写es数据,因为是异步写入es,因此不会阻塞,会很快完成写入,在完成1W条数据写入时可以手工dump内存快照
int size = 100;
ExecutorService es = Executors.newFixedThreadPool(size);
Thread.sleep(10 * 1000);
for (int i = 0; i < 10000; i++) {
int finalI = i;
es.submit(() -> {
try {
indexWriterService.indexDelete(docMessage);
} catch (Exception e) {
throw new RuntimeException(e);
}
System.out.println(System.currentTimeMillis() + ":"+ finalI);
}
);
}
Thread.sleep(Integer.MAX_VALUE);
dump命令
jmap -dump:live,format=b,file=dump.hprof 30652
使用ibm工具分析dump文件,工具jar可以在官网下载,执行命令如下(因为是jdk17所以有很多参数,如果不是17可以直接执行工具)
java
# jdk17导致不得不加的参数
--add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/sun.net.util=ALL-UNNAMED --add-opens java.base/java.util=ALL-UNNAMED --add-opens java.base/java.lang.reflect=ALL-UNNAMED --add-opens java.base/java.text=ALL-UNNAMED --add-opens java.desktop/java.awt.font=ALL-UNNAMED --add-opens java.desktop/sun.swing=ALL-UNNAMED
# jdk8直接直接下面这个工具jar即可
-Xmx4G -jar ha457.jar
分析结果,可以看到72%的内存暂用被列举为嫌疑对象了,而其中42%也正是与我们推断一致,蓝色部分为对象入口PoolingNHttpClientConnectionManager->CPool->LinkedList->Node(LeaseRequest)。至此破案
另外的30%呢,同样可以通过工具跟踪到其中23%的占用其实是我们的spring框架中加载的环境配置数据
压测时使用arthas对生产进行dump,也验证了我们的推断-.-
注1:
at org.opensearch.client.RestHighLevelClient$1.onFailure(RestHighLevelClient.java:1966) ~[opensearch-rest-high-level-client-2.3.0.jar:2.3.0]
at org.opensearch.client.RestClient$FailureTrackingResponseListener.onDefinitiveFailure(RestClient.java:707) ~[opensearch-rest-client-2.3.0.jar:2.3.0]
at org.opensearch.client.RestClient$1.failed(RestClient.java:450) ~[opensearch-rest-client-2.3.0.jar:2.3.0]
at org.apache.http.concurrent.BasicFuture.failed(BasicFuture.java:137) ~[httpcore-4.4.11.jar:4.4.11]
at org.apache.http.impl.nio.client.DefaultClientExchangeHandlerImpl.executionFailed(DefaultClientExchangeHandlerImpl.java:101) ~[httpasyncclient-4.1.4.jar:4.1.4]
at org.apache.http.impl.nio.client.AbstractClientExchangeHandler.failed(AbstractClientExchangeHandler.java:426) ~[httpasyncclient-4.1.4.jar:4.1.4]
at org.apache.http.impl.nio.client.AbstractClientExchangeHandler.connectionRequestFailed(AbstractClientExchangeHandler.java:348) ~[httpasyncclient-4.1.4.jar:4.1.4]
at org.apache.http.impl.nio.client.AbstractClientExchangeHandler.access$100(AbstractClientExchangeHandler.java:62) ~[httpasyncclient-4.1.4.jar:4.1.4]
at org.apache.http.impl.nio.client.AbstractClientExchangeHandler$1.failed(AbstractClientExchangeHandler.java:392) ~[httpasyncclient-4.1.4.jar:4.1.4]
at org.apache.http.concurrent.BasicFuture.failed(BasicFuture.java:137) ~[httpcore-4.4.11.jar:4.4.11]
at org.apache.http.impl.nio.conn.PoolingNHttpClientConnectionManager$1.failed(PoolingNHttpClientConnectionManager.java:316) ~[httpasyncclient-4.1.4.jar:4.1.4]
at org.apache.http.concurrent.BasicFuture.failed(BasicFuture.java:137) ~[httpcore-4.4.11.jar:4.4.11]
at org.apache.http.nio.pool.AbstractNIOConnPool.fireCallbacks(AbstractNIOConnPool.java:503) ~[httpcore-nio-4.4.11.jar:4.4.11]
at org.apache.http.nio.pool.AbstractNIOConnPool.requestTimeout(AbstractNIOConnPool.java:633) ~[httpcore-nio-4.4.11.jar:4.4.11]
at org.apache.http.nio.pool.AbstractNIOConnPool$InternalSessionRequestCallback.timeout(AbstractNIOConnPool.java:894) ~[httpcore-nio-4.4.11.jar:4.4.11]
at org.apache.http.impl.nio.reactor.SessionRequestImpl.timeout(SessionRequestImpl.java:183) ~[httpcore-nio-4.4.11.jar:4.4.11]
at org.apache.http.impl.nio.reactor.DefaultConnectingIOReactor.processTimeouts(DefaultConnectingIOReactor.java:210) ~[httpcore-nio-4.4.11.jar:4.4.11]
at org.apache.http.impl.nio.reactor.DefaultConnectingIOReactor.processEvents(DefaultConnectingIOReactor.java:155) ~[httpcore-nio-4.4.11.jar:4.4.11]
at org.apache.http.impl.nio.reactor.AbstractMultiworkerIOReactor.execute(AbstractMultiworkerIOReactor.java:351) ~[httpcore-nio-4.4.11.jar:4.4.11]
at org.apache.http.impl.nio.conn.PoolingNHttpClientConnectionManager.execute(PoolingNHttpClientConnectionManager.java:221) ~[httpasyncclient-4.1.4.jar:4.1.4]
at org.apache.http.impl.nio.client.CloseableHttpAsyncClientBase$1.run(CloseableHttpAsyncClientBase.java:64) ~[httpasyncclient-4.1.4.jar:4.1.4]
at java.lang.Thread.run(Thread.java:833) [?:?]
Caused by: java.util.concurrent.TimeoutException: Connection lease request time out
at org.apache.http.nio.pool.AbstractNIOConnPool.processPendingRequest(AbstractNIOConnPool.java:411) ~[httpcore-nio-4.4.11.jar:4.4.11]
at org.apache.http.nio.pool.AbstractNIOConnPool.processNextPendingRequest(AbstractNIOConnPool.java:391) ~[httpcore-nio-4.4.11.jar:4.4.11]
at org.apache.http.nio.pool.AbstractNIOConnPool.requestTimeout(AbstractNIOConnPool.java:629) ~[httpcore-nio-4.4.11.jar:4.4.11]
... 8 more
注2:
RestClient.builder(HttpHost.create("myServerCluster")).setHttpClientConfigCallback(httpClientBuilder -> {
httpClientBuilder.setMaxConnTotal(apolloConfig.getEsMaxConnectTotal());
httpClientBuilder.setMaxConnPerRoute(apolloConfig.getEsMaxConnectPerRoute());
ConnectingIOReactor ioreactor = IOReactorUtils.create(
defaultIOReactorConfig != null ? defaultIOReactorConfig : IOReactorConfig.DEFAULT, threadFactory);
PoolingNHttpClientConnectionManager poolingmgr = new PoolingNHttpClientConnectionManager(
ioreactor,
RegistryBuilder.<SchemeIOSessionStrategy>create()
.register("http", NoopIOSessionStrategy.INSTANCE)
.register("https", null)
.build());
httpClientBuilder.setConnectionManager(poolingmgr);
return httpClientBuilder;
});
注3:
PoolingNHttpClientConnectionManager poolingMgr = esClientFactory.getPoolingmgr();
if (poolingMgr != null) {
for (HttpRoute route : poolingMgr.getRoutes()) {
System.out.println("pool_status="+poolingMgr.getStats(route));
}
}