经常在分析 ANR 问题的时候,需要对线程状态了解得比较清楚,才能进一步分析问题。
线程状态
java 的线程状态定义在 Thread.State 中,如下:
public enum State {
/**
* Thread state for a thread which has not yet started.
*/
NEW,
/**
* Thread state for a runnable thread. A thread in the runnable
* state is executing in the Java virtual machine but it may
* be waiting for other resources from the operating system
* such as processor.
*/
RUNNABLE,
/**
* Thread state for a thread blocked waiting for a monitor lock.
* A thread in the blocked state is waiting for a monitor lock
* to enter a synchronized block/method or
* reenter a synchronized block/method after calling
* {@link Object#wait() Object.wait}.
*/
BLOCKED,
/**
* Thread state for a waiting thread.
* A thread is in the waiting state due to calling one of the
* following methods:
*
* - {@link Object#wait() Object.wait} with no timeout
* - {@link #join() Thread.join} with no timeout
* - {@link LockSupport#park() LockSupport.park}
*
*
* A thread in the waiting state is waiting for another thread to
* perform a particular action.
*
* For example, a thread that has called Object.wait()
* on an object is waiting for another thread to call
* Object.notify() or Object.notifyAll() on
* that object. A thread that has called Thread.join()
* is waiting for a specified thread to terminate.
*/
WAITING,
/**
* Thread state for a waiting thread with a specified waiting time.
* A thread is in the timed waiting state due to calling one of
* the following methods with a specified positive waiting time:
*
* - {@link #sleep Thread.sleep}
* - {@link Object#wait(long) Object.wait} with timeout
* - {@link #join(long) Thread.join} with timeout
* - {@link LockSupport#parkNanos LockSupport.parkNanos}
* - {@link LockSupport#parkUntil LockSupport.parkUntil}
*
*/
TIMED_WAITING,
/**
* Thread state for a terminated thread.
* The thread has completed execution.
*/
TERMINATED;
}
其中,相关状态如下:
状态 | 说明 |
---|---|
NEW | 线程创建,但是还没有start() |
RUNNABLE | 线程执行或者在等待系统资源,例如CPU资源 |
BLOCKED | 线程在等待其它线程释放一个锁,则会进入 Blocked 状态。例如在等待一个同步代码块的锁,获得锁则进入 Runnable 状态 |
WAITING | 线程在等待一个其它线程唤醒它。可能是调用了 Object.wait(),Thread.join() ,LockSupport.part() |
TIMED_WAITING | 同上,只不过方法换成了 Object.wait(timeOut) ,Thread.join(millis ) ,LockSupport.part(deadline ) |
TERMINATED | 线程销毁 |
java 的线程状态也是读取 native 线程的状态,如下:
//Thread.java
public State getState() {
// get current thread state
// Android-changed: Replace unused threadStatus field with started field.
// Use Android specific nativeGetStatus() method. See comment on started field for more
// information.
// return sun.misc.VM.toThreadState(threadStatus);
return State.values()[nativeGetStatus(started)];
}
private native int nativeGetStatus(boolean hasBeenStarted);
对应的 jni 实现,在 java_lang_Thread.cc 中:
static jint Thread_nativeGetStatus(JNIEnv* env, jobject java_thread, jboolean has_been_started) {
// Ordinals from Java's Thread.State.
const jint kJavaNew = 0;
const jint kJavaRunnable = 1;
const jint kJavaBlocked = 2;
const jint kJavaWaiting = 3;
const jint kJavaTimedWaiting = 4;
const jint kJavaTerminated = 5;
ScopedObjectAccess soa(env);
ThreadState internal_thread_state =
(has_been_started ? ThreadState::kTerminated : ThreadState::kStarting);
MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
Thread* thread = Thread::FromManagedThread(soa, java_thread);
if (thread != nullptr) {
internal_thread_state = thread->GetState();
}
switch (internal_thread_state) {
case ThreadState::kTerminated: return kJavaTerminated;
case ThreadState::kRunnable: return kJavaRunnable;
case ThreadState::kObsoleteRunnable: break; // Obsolete value.
case ThreadState::kTimedWaiting: return kJavaTimedWaiting;
case ThreadState::kSleeping: return kJavaTimedWaiting;
case ThreadState::kBlocked: return kJavaBlocked;
case ThreadState::kWaiting: return kJavaWaiting;
case ThreadState::kStarting: return kJavaNew;
case ThreadState::kNative: return kJavaRunnable;
case ThreadState::kWaitingForTaskProcessor: return kJavaWaiting;
case ThreadState::kWaitingForLockInflation: return kJavaWaiting;
case ThreadState::kWaitingForGcToComplete: return kJavaWaiting;
case ThreadState::kWaitingPerformingGc: return kJavaWaiting;
case ThreadState::kWaitingForCheckPointsToRun: return kJavaWaiting;
case ThreadState::kWaitingForDebuggerSend: return kJavaWaiting;
case ThreadState::kWaitingForDebuggerToAttach: return kJavaWaiting;
case ThreadState::kWaitingInMainDebuggerLoop: return kJavaWaiting;
case ThreadState::kWaitingForDebuggerSuspension: return kJavaWaiting;
case ThreadState::kWaitingForDeoptimization: return kJavaWaiting;
case ThreadState::kWaitingForGetObjectsAllocated: return kJavaWaiting;
case ThreadState::kWaitingForJniOnLoad: return kJavaWaiting;
case ThreadState::kWaitingForSignalCatcherOutput: return kJavaWaiting;
case ThreadState::kWaitingInMainSignalCatcherLoop: return kJavaWaiting;
case ThreadState::kWaitingForMethodTracingStart: return kJavaWaiting;
case ThreadState::kWaitingForVisitObjects: return kJavaWaiting;
case ThreadState::kWaitingWeakGcRootRead: return kJavaRunnable;
case ThreadState::kWaitingForGcThreadFlip: return kJavaWaiting;
case ThreadState::kNativeForAbort: return kJavaWaiting;
case ThreadState::kSuspended: return kJavaRunnable;
// Don't add a 'default' here so the compiler can spot incompatible enum changes.
}
LOG(ERROR) << "Unexpected thread state: " << internal_thread_state;
return -1; // Unreachable.
}
所以在 C++ 层中,对应的相关状态如下:
enum class ThreadState : uint8_t {
// `kRunnable` was previously 67 but it is now set to 0 so that we do not need to extract
// flags from the thread's `state_and_flags` to check for any flag being set while Runnable.
// Note: All atomic accesses for a location should use the same data size,
// so the incorrect old approach of reading just 16 bits has been rewritten.
// Java
// Thread.State JDWP state
kTerminated = 66, // TERMINATED TS_ZOMBIE Thread.run has returned, but Thread* still around
kRunnable = 0, // RUNNABLE TS_RUNNING runnable
kObsoleteRunnable = 67, // --- --- obsolete value
kTimedWaiting = 68, // TIMED_WAITING TS_WAIT in Object.wait() with a timeout
kSleeping, // TIMED_WAITING TS_SLEEPING in Thread.sleep()
kBlocked, // BLOCKED TS_MONITOR blocked on a monitor
kWaiting, // WAITING TS_WAIT in Object.wait()
kWaitingForLockInflation, // WAITING TS_WAIT blocked inflating a thin-lock
kWaitingForTaskProcessor, // WAITING TS_WAIT blocked waiting for taskProcessor
kWaitingForGcToComplete, // WAITING TS_WAIT blocked waiting for GC
kWaitingForCheckPointsToRun, // WAITING TS_WAIT GC waiting for checkpoints to run
kWaitingPerformingGc, // WAITING TS_WAIT performing GC
kWaitingForDebuggerSend, // WAITING TS_WAIT blocked waiting for events to be sent
kWaitingForDebuggerToAttach, // WAITING TS_WAIT blocked waiting for debugger to attach
kWaitingInMainDebuggerLoop, // WAITING TS_WAIT blocking/reading/processing debugger events
kWaitingForDebuggerSuspension, // WAITING TS_WAIT waiting for debugger suspend all
kWaitingForJniOnLoad, // WAITING TS_WAIT waiting for execution of dlopen and JNI on load code
kWaitingForSignalCatcherOutput, // WAITING TS_WAIT waiting for signal catcher IO to complete
kWaitingInMainSignalCatcherLoop, // WAITING TS_WAIT blocking/reading/processing signals
kWaitingForDeoptimization, // WAITING TS_WAIT waiting for deoptimization suspend all
kWaitingForMethodTracingStart, // WAITING TS_WAIT waiting for method tracing to start
kWaitingForVisitObjects, // WAITING TS_WAIT waiting for visiting objects
kWaitingForGetObjectsAllocated, // WAITING TS_WAIT waiting for getting the number of allocated objects
kWaitingWeakGcRootRead, // WAITING TS_WAIT waiting on the GC to read a weak root
kWaitingForGcThreadFlip, // WAITING TS_WAIT waiting on the GC thread flip (CC collector) to finish
kNativeForAbort, // WAITING TS_WAIT checking other threads are not run on abort.
kStarting, // NEW TS_WAIT native thread started, not yet ready to run managed code
kNative, // RUNNABLE TS_RUNNING running in a JNI native method
kSuspended, // RUNNABLE TS_RUNNING suspended by GC or debugger
};
可以看到 C++ 里面的线程状态会有更多的细节。其中,包括 kNative 表示正在执行 native 方法。
ANR 日志中线程状态说明
类似的 ANR 日志如下:
"main" prio=5 tid=1 Native
| group="main" sCount=1 dsCount=0 flags=1 obj=0x75af8d38 self=0xefe71e00
| sysTid=5423 nice=-10 cgrp=default sched=0/0 handle=0xf0273dc0
| state=S schedstat=( 61207235540 31084517122 178273 ) utm=4704 stm=1416 core=1 HZ=100
| stack=0xff039000-0xff03b000 stackSize=8192KB
| held mutexes=
kernel: (couldn't read /proc/self/task/5423/stack)
native: #00 pc 00052188 /apex/com.android.runtime/lib/bionic/libc.so (syscall+28)
native: #01 pc 0005750f /apex/com.android.runtime/lib/bionic/libc.so (__futex_wait_ex(void volatile*, bool, int, bool, timespec const*)+90)
native: #02 pc 000a07f3 /apex/com.android.runtime/lib/bionic/libc.so (pthread_cond_wait+32)
native: #03 pc 0006949b /data/app/com.starify.ola.android-62F7MEKjVCxvF8JrKvQgEw==/lib/arm/libalivc_conan.so (std::__ndk1::condition_variable::wait(std::__ndk1::unique_lock&)+26)
native: #04 pc 00069c3d /data/app/com.starify.ola.android-62F7MEKjVCxvF8JrKvQgEw==/lib/arm/libalivc_conan.so (std::__ndk1::__assoc_sub_state::__sub_wait(std::__ndk1::unique_lock&)+32)
native: #05 pc 00069bb3 /data/app/com.starify.ola.android-62F7MEKjVCxvF8JrKvQgEw==/lib/arm/libalivc_conan.so (std::__ndk1::__assoc_sub_state::copy()+38)
native: #06 pc 00069dd7 /data/app/com.starify.ola.android-62F7MEKjVCxvF8JrKvQgEw==/lib/arm/libalivc_conan.so (std::__ndk1::future::get()+12)
native: #07 pc 0029499b /data/app/com.starify.ola.android-62F7MEKjVCxvF8JrKvQgEw==/lib/arm/libugsv.so (???)
native: #08 pc 0009786b /data/app/com.starify.ola.android-62F7MEKjVCxvF8JrKvQgEw==/lib/arm/libugsv.so (???)
native: #09 pc 0008f6bd /data/app/com.starify.ola.android-62F7MEKjVCxvF8JrKvQgEw==/lib/arm/libugsv.so (???)
native: #10 pc 0009122b /data/app/com.starify.ola.android-62F7MEKjVCxvF8JrKvQgEw==/lib/arm/libugsv.so (???)
其中,第一行 "main" prio=5 tid=1 Native,表示当前线程的状态是 Native。那么在 ANR 日志中,线程的状态细分如下:
java Thread 的状态 | C++ 层Thread 状态 | 说明 |
---|---|---|
NEW | INITIALIZING | 新建,正在分配资源 |
NEW | STARING | 新建,正在启动 |
RUNNABLE | RUNING/RUNNABLE | 线程创建或者正在运行 |
RUNNABLE | NATIVE | 正在执行 JNI 方法 |
RUNNABLE | SUSPENDED | 现在挂起,一般是GC导致 |
BLOCKED | MOINITOR | 线程阻塞,等待其它线程释放对象锁 |
WAITING | WAIT | 执行了无超时参数的 wait() 等方法 |
WAITING | VMWAIT | 正在等待 VM 资源 |
TERMINATED | ZOMBIE | 线程销毁 |
其中,我们主要关心的状态是 MOINITOR,WAIT,如果主线程是这个状态,那么 ANR 的原因就是主线程被阻塞或者在等待子线程释放锁。