core文件内容如下:
Missing separate debuginfo for /opt/java_default/jre/lib/amd64/server/libjvm.so
Try: yum --enablerepo='*debug*' install /usr/lib/debug/.build-id/df/e156b65f2033a068019910920c63a70457b58e.debug
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
Missing separate debuginfo for /opt/jdk1.8.0_91/jre/lib/amd64/libverify.so
Try: yum --enablerepo='*debug*' install /usr/lib/debug/.build-id/22/7d54c375f0dd0cc195a4a08178545fcc262447.debug
Missing separate debuginfo for /opt/jdk1.8.0_91/jre/lib/amd64/libmanagement.so
Try: yum --enablerepo='*debug*' install /usr/lib/debug/.build-id/c1/0632f81f87b3843d14a5a8888f10978ee361ea.debug
Core was generated by `testConsumer -a134.129.67.42:9876;134.129.67.43:9876 -gcid_repair_to_bill_topic'.
Program terminated with signal 6, Aborted.
#0 0x00007fcf105775f7 in raise () from /lib64/libc.so.6
Missing separate debuginfos, use: debuginfo-install glibc-2.17-105.el7.x86_64 libgcc-4.8.5-4.el7.x86_64 libstdc++-4.8.5-4.el7.x86_64 zlib-1.2.7-15.el7.x86_64
(gdb) bt
#0 0x00007fcf105775f7 in raise () from /lib64/libc.so.6
#1 0x00007fcf10578ce8 in abort () from /lib64/libc.so.6
#2 0x00007fcf1231faa5 in os::abort(bool) () from /opt/java_default/jre/lib/amd64/server/libjvm.so
#3 0x00007fcf124be593 in VMError::report_and_die() () from /opt/java_default/jre/lib/amd64/server/libjvm.so
#4 0x00007fcf123252cf in JVM_handle_linux_signal () from /opt/java_default/jre/lib/amd64/server/libjvm.so
#5 0x00007fcf1231ba63 in signalHandler(int, siginfo*, void*) () from /opt/java_default/jre/lib/amd64/server/libjvm.so
#6
#7 0x00007fcf1210dcb3 in JNIHandleBlock::allocate_handle(oopDesc*) () from /opt/java_default/jre/lib/amd64/server/libjvm.so
#8 0x00007fcf120d231f in jni_invoke_nonstatic(JNIEnv_*, JavaValue*, _jobject*, JNICallType, _jmethodID*, JNI_ArgumentPusher*, Thread*) ()
from /opt/java_default/jre/lib/amd64/server/libjvm.so
#9 0x00007fcf120d7ed1 in jni_CallObjectMethodV () from /opt/java_default/jre/lib/amd64/server/libjvm.so
#10 0x00007fcf1113336f in JNIEnv_::CallObjectMethod (this=0x146f9f8, obj=0xf8000e40, methodID=0x16a0898) at /opt/java_default/include/jni.h:901
#11 0x00007fcf111324f8 in Consumer::consumeMessagesByTopic (this=0x7ffe39c378a0, topic="repair_to_bill_topic_11", sub_expression=" ", max_num=1,
Python Exception list index out of range:
timeout_in_ms=3000, resultArray=empty std::list) at consumer.cpp:134
#12 0x0000000000402047 in main (argc=4, argv=0x7ffe39c37aa8) at testConsumer.cpp:86
(gdb)
jvm的crash文件hs_*部分内容如下:
#
# A fatal error has been detected by the Java Runtime Environment:
#
# SIGSEGV (0xb) at pc=0x00007fcf1210dcb3, pid=9201, tid=140527349421888
#
# JRE version: Java(TM) SE Runtime Environment (8.0_91-b14) (build 1.8.0_91-b14)
# Java VM: Java HotSpot(TM) 64-Bit Server VM (25.91-b14 interpreted mode linux-amd64 compressed oops)
# Problematic frame:
# V [libjvm.so+0x70bcb3] JNIHandleBlock::allocate_handle(oopDesc*)+0x253
#
# Core dump written. Default location: /home/coll/bin/core or core.9201
#
# If you would like to submit a bug report, please visit:
# http://bugreport.java.com/bugreport/crash.jsp
#
--------------- T H R E A D ---------------
Current thread (0x000000000146f800): JavaThread "main" [_thread_in_vm, id=9201, stack(0x00007ffe39b3c000,0x00007ffe39c3c000)]
siginfo: si_signo: 11 (SIGSEGV), si_code: 1 (SEGV_MAPERR), si_addr: 0x0000000000000011
Registers:
RAX=0x000000000000deab, RBX=0x0000000001470980, RCX=0x0000000001470980, RDX=0x0000000000000020
RSP=0x00007ffe39c37410, RBP=0x00007ffe39c37440, RSI=0x00000000fd5fad30, RDI=0x0000000001470980
R8 =0x0000000000000011, R9 =0x0000000000000002, R10=0x0000000000000001, R11=0x00007ffe39c37120
R12=0x00000000fd5f0060, R13=0x00007fcefea9ee60, R14=0x0000000000000020, R15=0x00000000fd5fad30
RIP=0x00007fcf1210dcb3, EFLAGS=0x0000000000010206, CSGSFS=0x0000000000000033, ERR=0x0000000000000004
TRAPNO=0x000000000000000e
Top of Stack: (sp=0x00007ffe39c37410)
0x00007ffe39c37410: 0000000d39c37440 000000000146eb90
0x00007ffe39c37420: 00000000fd5f0060 00007fcefea9ee60
0x00007ffe39c37430: 000000000146dbd0 000000000146f800
0x00007ffe39c37440: 00007ffe39c375d0 00007fcf120d231f
0x00007ffe39c37450: 00007fcefedd4e78 000000000146f800
0x00007ffe39c37460: 00007ffe39c374b0 00007ffe39c37530
0x00007ffe39c37470: 00007ffe39c37600 00007ffe39c37660
0x00007ffe39c37480: 000000000146f9f8 00007fcf120f1450
0x00007ffe39c37490: 000000000146d6e0 000000000146d6f0
0x00007ffe39c374a0: 000000000146dac8 00000000000003d8
0x00007ffe39c374b0: 00007fcf10e1ed44 00000000fd5f0060
0x00007ffe39c374c0: 00000000fd5ef7d8 00000000fd5ef830
0x00007ffe39c374d0: 0000000000000001 00007fcf00000bb8
0x00007ffe39c374e0: 0000000000000000 00007fcf12be8c38
0x00007ffe39c374f0: 00007fcf12bea000 0000000001010113
0x00007ffe39c37500: 00007fcf10e373b0 00007ffe39c374b8
0x00007ffe39c37510: 00007ffe39c374f9 0000000800000001
0x00007ffe39c37520: 00007fcf10e1d400 00007ffe39c376a8
0x00007ffe39c37530: 00007fcf12954490 00007fcefe97eac0
0x00007ffe39c37540: 0000000012be8c38 00007fcf12bea358
0x00007ffe39c37550: 0000000000000000 00007fcf129eee6f
0x00007ffe39c37560: 00007fcefea9ee60 000000000146f800
0x00007ffe39c37570: 00007fcefea9ee60 000000000146f800
0x00007ffe39c37580: 0000000500000001 00007fcefea9ee60
0x00007ffe39c37590: 00007fcefea9ee60 00007fcefea9ee60
0x00007ffe39c375a0: 00007ffe39c37600 0000000000000000
0x00007ffe39c375b0: 000000000146f800 00000000016a0898
0x00007ffe39c375c0: 00007ffe39c37640 000000000146f9f8
0x00007ffe39c375d0: 00007ffe39c376c0 00007fcf120d7ed1
0x00007ffe39c375e0: 000000000146f800 00000000fd5ef7d8
0x00007ffe39c375f0: 00007ffe39c376f0 00000000f8000e40
0x00007ffe39c37600: 00007fcf129550f0 00007fcefe97eac0
Instructions: (pc=0x00007fcf1210dcb3)
0x00007fcf1210dc93: 89 81 00 01 00 00 48 63 c2 4c 89 3c c1 48 83 c4
0x00007fcf1210dca3: 08 48 8d 04 c1 5b 41 5c 41 5d 41 5e 41 5f c9 c3
0x00007fcf1210dcb3: 49 8b 00 48 89 83 20 01 00 00 4d 89 38 48 83 c4
0x00007fcf1210dcc3: 08 5b 41 5c 41 5d 41 5e 4c 89 c0 41 5f c9 c3 31
Register to memory mapping:
RAX=0x000000000000deab is an unknown value
RBX=0x0000000001470980 is an unknown value
RCX=0x0000000001470980 is an unknown value
RDX=0x0000000000000020 is an unknown value
RSP=0x00007ffe39c37410 is pointing into the stack for thread: 0x000000000146f800
RBP=0x00007ffe39c37440 is pointing into the stack for thread: 0x000000000146f800
RSI=0x00000000fd5fad30 is an oop
[Ljava.lang.Object;
- klass: 'java/lang/Object'[]
- length: 0
RDI=0x0000000001470980 is an unknown value
R8 =0x0000000000000011 is an unknown value
R9 =0x0000000000000002 is an unknown value
R10=0x0000000000000001 is an unknown value
R11=0x00007ffe39c37120 is pointing into the stack for thread: 0x000000000146f800
R12=0x00000000fd5f0060 is an oop
java.util.ArrayList
- klass: 'java/util/ArrayList'
R13={method} {0x00007fcefea9ee60} 'toArray' '()[Ljava/lang/Object;' in 'java/util/ArrayList'
R14=0x0000000000000020 is an unknown value
R15=0x00000000fd5fad30 is an oop
[Ljava.lang.Object;
- klass: 'java/lang/Object'[]
- length: 0
Stack: [0x00007ffe39b3c000,0x00007ffe39c3c000], sp=0x00007ffe39c37410, free space=1005k
Native frames: (J=compiled Java code, j=interpreted, Vv=VM code, C=native code)
V [libjvm.so+0x70bcb3] JNIHandleBlock::allocate_handle(oopDesc*)+0x253
V [libjvm.so+0x6d031f] jni_invoke_nonstatic(JNIEnv_*, JavaValue*, _jobject*, JNICallType, _jmethodID*, JNI_ArgumentPusher*, Thread*)+0xa0f
V [libjvm.so+0x6d5ed1] jni_CallObjectMethodV+0x111
C [libMQJniApi.so+0x1036f] JNIEnv_::CallObjectMethod(_jobject*, _jmethodID*, ...)+0xb5
C [libMQJniApi.so+0xf4f8] Consumer::consumeMessagesByTopic(std::string const&, std::string const&, int, int, std::list >&)+0x1e2
C [testConsumer+0x2047]
C [libc.so.6+0x21b15] __libc_start_main+0xf5
--------------- P R O C E S S ---------------
jmethodID mid;
jstring jstrTopic = Object::env->NewStringUTF(topic.c_str());
jstring jstrSubExpression = Object::env->NewStringUTF(sub_expression.c_str());
jobject jobjectMQResultList = Object::env->CallObjectMethod(jobjConsumer
,jmidMQConsumerImpl_consumeMessagesByTopic
,jstrTopic
,jstrSubExpression
,max_num
,timeout_in_ms
);
if (catchException())
{
return false;
}
jobjectArray jMQResultArray;
jMQResultArray = (jobjectArray)Object::env->CallObjectMethod(jobjectMQResultList,jmidList_toArray);
resultArray.clear();
int size = Object::env->GetArrayLength(jMQResultArray);
for(int i=0; i<size;i++){
jobject jobjResult = Object::env->GetObjectArrayElement(jMQResultArray,i);
MQResult result(jobjResult);
resultArray.push_back(result);
// 外面MQResult.release 释放
//Object::env->DeleteLocalRef(jobjResult);
}
Object::env->DeleteLocalRef(jclsList);
Object::env->DeleteLocalRef(jstrTopic);
Object::env->DeleteLocalRef(jstrSubExpression);
Object::env->DeleteLocalRef(jobjectMQResultList);
Object::env->DeleteLocalRef(jMQResultArray);
具体的问题代码位置是在jMQResultArray = (jobjectArray)Object::env->CallObjectMethod(jobjectMQResultList,jmidList_toArray);
这一行,当然一开始定位到这行问题代码时想这个就是调用java list.to_array()函数而已;然后根据jvm的crash文件提示(其实也看不到它时什么意思所以只能猜,当然我想如果对jvm很真正的了解的话应该立马就能定位到代码问题所在,由于对java不熟也只能这样瞎猜)应该是要进行to_array函数的对象是指向空的导致了转换失败,但是又相信在aix上都跑了那么就都没问题这不科学呀,后面还去看了java的代码发现里面如果返回NULL的话一定会被之前的异常捕获函数捕获才对呀;这么一看代码问题好像可以排查了,后面又怀疑起会不会jdk的版本问题,笔记aix上跑的是1.6版本而我用的是1.8,后面去找了个aix上1.8的jdk跑了段测试代码发现也能正常;而linux这边的系统也尝试用1.6版本的jdk去泡,发现还是core了,这就又郁闷起来了,但目前版本问题又可以排除了,没办法只能抱着尝试心态,将调用rocketmq接收消息的函数屏蔽看看是否还是core了,后面发现还是core了,这就又可以排除一个问题就是java写的调用mq的接口是正常的;好那么接下来只能一行行排除一问题了,(很多人会问怎么不一开始就一行行排除呢?这主要是因为core文件core的地方是是在CallObjectMethod函数导致一开始就不会优先考虑这问题了)最终定位到问题代码是在
Object::env->DeleteLocalRef(jclsList);
,一看恍然大悟,这地方怎么能释放它呢,这个可是成员变量只会在进程启动时初始化下获取下list.to_array函数的句柄,这里既然释放了那岂不是会导致没法正常调用了;仔细思考了下又为什么会跑一会儿然后才core呢,(当然这个时通过jmap -heap 排除过会不会哪里没有释放导致的泄露crash的)这个可能和jvm的gc内存回收机制有关系,gc不是立马回收释放的资源的。当然这边还有的疑问时为什么aix上时没问题的呢?这个问题目前没找到答案也只能说系统差异问题吧,导致了回收机制可能也有所差异吧。