场景1:
java.lang.OutOfMemoryError: pthread_create (1040KB stack) failed: Try again
java.lang.Thread.nativeCreate(Native Method)
java.lang.Thread.start(Thread.java:1063)
//......
手机内存情况:
内存是够的,这个就比较扯了! 虚拟机的内存明显是够的,但是用户手机上App却OOM了.
memInfo=Runtime memory( maxMemory = 512.0MB, totalMemory = 114.2MB, freeMemory = 7.8MB ) ;
MemoryInfo (总内存:2867.2MB, 总可用内存:851.2MB , lowMemory:false )
场景2:
---java.lang.OutOfMemoryError: Failed to allocate a 4002012 byte allocation with 2883922 free bytes and 2MB until OOM
com.bumptech.glide.gifdecoder.GifDecoder.setData(GifDecoder.java:380)
com.bumptech.glide.load.resource.gif.GifDrawable.(GifDrawable.java:92)
//...
手机内存情况: 确实内存不足了!
memInfo=Runtime memory( maxMemory = 256.0MB, totalMemory = 256.0MB, freeMemory = 2.6MB ) ;//超过当前虚拟机可用内存阀值了
MemoryInfo (总内存:1716.4MB, 总可用内存:165.6MB , lowMemory:true )
场景1异常相关的源码:
art/runtime/thread.cc
void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool is_daemon) {
CHECK(java_peer != nullptr);
Thread* self = static_cast<JNIEnvExt*>(env)->self;
if (VLOG_IS_ON(threads)) {
ScopedObjectAccess soa(env);
ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_name);
mirror::String* java_name = reinterpret_cast<mirror::String*>(f->GetObject(
soa.Decode<mirror::Object*>(java_peer)));
std::string thread_name;
if (java_name != nullptr) {
thread_name = java_name->ToModifiedUtf8();
} else {
thread_name = "(Unnamed)";
}
VLOG(threads) << "Creating native thread for " << thread_name;
self->Dump(LOG(INFO));
}
Runtime* runtime = Runtime::Current();
// Atomically start the birth of the thread ensuring the runtime isn't shutting down.
bool thread_start_during_shutdown = false;
{
MutexLock mu(self, *Locks::runtime_shutdown_lock_);
if (runtime->IsShuttingDownLocked()) {
thread_start_during_shutdown = true;
} else {
runtime->StartThreadBirth();
}
}
if (thread_start_during_shutdown) {
ScopedLocalRef<jclass> error_class(env, env->FindClass("java/lang/InternalError"));
env->ThrowNew(error_class.get(), "Thread starting during runtime shutdown"); //这个异常信息也让人头疼,虽然和OOM无关.
return;
}
Thread* child_thread = new Thread(is_daemon);
// Use global JNI ref to hold peer live while child thread starts.
child_thread->tlsPtr_.jpeer = env->NewGlobalRef(java_peer);
stack_size = FixStackSize(stack_size);
// Thread.start is synchronized, so we know that nativePeer is 0, and know that we're not racing to
// assign it.
env->SetLongField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer,
reinterpret_cast<jlong>(child_thread));
// Try to allocate a JNIEnvExt for the thread. We do this here as we might be out of memory and
// do not have a good way to report this on the child's side.
std::unique_ptr<JNIEnvExt> child_jni_env_ext(
JNIEnvExt::Create(child_thread, Runtime::Current()->GetJavaVM()));
int pthread_create_result = 0;
if (child_jni_env_ext.get() != nullptr) {
pthread_t new_pthread;
pthread_attr_t attr;
child_thread->tlsPtr_.tmp_jni_env = child_jni_env_ext.get();
CHECK_PTHREAD_CALL(pthread_attr_init, (&attr), "new thread");
CHECK_PTHREAD_CALL(pthread_attr_setdetachstate, (&attr, PTHREAD_CREATE_DETACHED),
"PTHREAD_CREATE_DETACHED");
CHECK_PTHREAD_CALL(pthread_attr_setstacksize, (&attr, stack_size), stack_size);
pthread_create_result = pthread_create(&new_pthread,
&attr,
Thread::CreateCallback,
child_thread);
CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attr), "new thread");
if (pthread_create_result == 0) {
// pthread_create started the new thread. The child is now responsible for managing the
// JNIEnvExt we created.
// Note: we can't check for tmp_jni_env == nullptr, as that would require synchronization
// between the threads.
child_jni_env_ext.release();
return;
}
}
// Either JNIEnvExt::Create or pthread_create(3) failed, so clean up.
{
MutexLock mu(self, *Locks::runtime_shutdown_lock_);
runtime->EndThreadBirth();
}
// Manually delete the global reference since Thread::Init will not have been run.
env->DeleteGlobalRef(child_thread->tlsPtr_.jpeer);
child_thread->tlsPtr_.jpeer = nullptr;
delete child_thread;
child_thread = nullptr;
// TODO: remove from thread group?
env->SetLongField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer, 0);
{
std::string msg(child_jni_env_ext.get() == nullptr ?
"Could not allocate JNI Env" :
StringPrintf("pthread_create (%s stack) failed: %s",
PrettySize(stack_size).c_str(), strerror(pthread_create_result)));//异常信息
ScopedObjectAccess soa(env);
soa.Self()->ThrowOutOfMemoryError(msg.c_str());
}
}
场景2异常相关的源码:
art/runtime/gc/heap.cc
当动态申请内存的时候,当前App虚拟机可用内存不足了抛出异常.
void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type) {
// If we're in a stack overflow, do not create a new exception. It would require running the
// constructor, which will of course still be in a stack overflow.
if (self->IsHandlingStackOverflow()) {
self->SetException(Runtime::Current()->GetPreAllocatedOutOfMemoryError());
return;
}
std::ostringstream oss;
size_t total_bytes_free = GetFreeMemory();
oss << "Failed to allocate a " << byte_count << " byte allocation with " << total_bytes_free
<< " free bytes and " << PrettySize(GetFreeMemoryUntilOOME()) << " until OOM"; //异常位置
// If the allocation failed due to fragmentation, print out the largest continuous allocation.
if (total_bytes_free >= byte_count) {
space::AllocSpace* space = nullptr;
if (allocator_type == kAllocatorTypeNonMoving) {
space = non_moving_space_;
} else if (allocator_type == kAllocatorTypeRosAlloc ||
allocator_type == kAllocatorTypeDlMalloc) {
space = main_space_;
} else if (allocator_type == kAllocatorTypeBumpPointer ||
allocator_type == kAllocatorTypeTLAB) {
space = bump_pointer_space_;
} else if (allocator_type == kAllocatorTypeRegion ||
allocator_type == kAllocatorTypeRegionTLAB) {
space = region_space_;
}
if (space != nullptr) {
space->LogFragmentationAllocFailure(oss, byte_count);
}
}
self->ThrowOutOfMemoryError(oss.str().c_str());
}
从崩溃信息和捕获到用户的手机信息来看,不是内存不足引起的OOM. 但是有个线索:大量的崩溃都是创建线程的时候发生的!
会不会是创建的线程太多了? 无人客服作为插件和其它插件都是跑在主App的进程里面的.
查看进程信息:
shell@ja3gduosctc:/ $ ps |grep jingdong
u0_a379 28707 2541 1286232 234364 ffffffff 00000000 S com.jingdong.app.mall
u0_a379 28936 2541 949156 59348 ffffffff 00000000 S com.jingdong.app.mall:jdpush
u0_a379 28989 2541 950204 60860 ffffffff 00000000 S com.jingdong.app.mall:WatchDogService
当前手机内核的最大线程数量限制:
1|shell@ja3gduosctc:/ $ cat proc/sys/kernel/threads-max
29753
当前App中的线程数量信息:
shell@ja3gduosctc:/ $ cat proc/28707/status
Name: ngdong.app.mall
State: S (sleeping)
Tgid: 28707
Pid: 28707
PPid: 2541
TracerPid: 0
Uid: 10379 10379 10379 10379
Gid: 10379 10379 10379 10379
FDSize: 512
Groups: 1015 1028 3001 3002 3003 50379
VmPeak: 1539020 kB
VmSize: 1425064 kB
VmLck: 0 kB
VmPin: 0 kB
VmHWM: 298572 kB
VmRSS: 291340 kB
VmData: 349752 kB
VmStk: 136 kB
VmExe: 8 kB
VmLib: 93940 kB
VmPTE: 748 kB
VmSwap: 0 kB
Threads: 150
SigQ: 0/14876
SigPnd: 0000000000000000
ShdPnd: 0000000000000000
SigBlk: 0000000000001204
SigIgn: 0000000000001000
SigCgt: 00000002000084f8
CapInh: 0000000000000000
CapPrm: 0000000000000000
CapEff: 0000000000000000
CapBnd: ffffffe000000000
Cpus_allowed: f
Cpus_allowed_list: 0-3
voluntary_ctxt_switches: 215120
nonvoluntary_ctxt_switches: 83582
到这里实际上思路是对的,但是这个问题不是必现的,当时也没有进一步的去验证这个思路. 最近刚好看到一篇文章把这个问题分析的非常精准,思路非常的清晰.
点击这个连接:
不可思议的OOM
这篇文章同时还分析了创建文件数量超限的OOM场景,同时做了代码验证!强烈推荐!!
首先应该尽量的把代码写规范,避免造成内存泄漏;其次就是在京东这样的大体量App中,这种场景是不可避免的,但是还是要保持一个严谨的态度对待问题,排查问题,解决问题,给用户更好的使用体验.