android系统中SystemServer WatchDog的主要作用是监控SystemServer进程的运行状态,防止其卡住或者死锁。
具体来说,watchDog线程会定期去检查SystemServer线程的运行情况。如果发现SystemServer线程超过一定时间未有响应,watchDog会认为SystemServer进程发生了问题,这时它会采取以下行动:
1. 打印出SystemServer线程当前的堆栈信息,以帮助定位问题。
日志格式如下"Blocked in monitor(monitor 不为空)|Blocked in handler on(monitor为空)"
2. 重启SystemServer进程。watchDog线程会先杀死已卡住的SystemServer进程,然后重新fork出一个新的SystemServer进程。
通过这种机制,watchDog线程可以像一只“看门狗”一样时刻监视SystemServer的状态,一旦发现SystemServer发生故障,就可以及时采取行动重启它,从而提高系统的健壮性和稳定性。watchDog线程在系统启动时由Init进程 fork 出,它需要持续运行以保护 SystemServer 不会发生故障时无人管控的情况
通过监听system_server进程中时间敏感线程的调度时间来判断进程当前是否卡顿,或者长时间持锁
//system_server前台线程
mMonitorChecker = new HandlerChecker(FgThread.getHandler(),
"foreground thread");
mHandlerCheckers.add(withDefaultTimeout(mMonitorChecker));
// Add checker for main thread. We only do a quick check since there
// can be UI running on the thread.
//system_server主线程
mHandlerCheckers.add(withDefaultTimeout(
new HandlerChecker(new Handler(Looper.getMainLooper()), "main thread")));
// Add checker for shared UI thread. system_server ui线程
mHandlerCheckers.add(withDefaultTimeout(
new HandlerChecker(UiThread.getHandler(), "ui thread")));
// And also check IO thread.
mHandlerCheckers.add(withDefaultTimeout(
new HandlerChecker(IoThread.getHandler(), "i/o thread")));
// And the display thread.
mHandlerCheckers.add(withDefaultTimeout(
new HandlerChecker(DisplayThread.getHandler(), "display thread")));
// And the animation thread. system_server 动画执行线程
mHandlerCheckers.add(withDefaultTimeout(
new HandlerChecker(AnimationThread.getHandler(), "animation thread")));
// And the surface animation thread.
mHandlerCheckers.add(withDefaultTimeout(
new HandlerChecker(SurfaceAnimationThread.getHandler(),
"surface animation thread")));
//检测是否binder线程池耗尽
addMonitor(new BinderThreadMonitor());
public class Watchdog implements Dumpable {
private void run() {
boolean waitedHalf = false;
while (true) {
for (int i=0; i 0) {
//睡眠半个检测周期,后检测消息是否得到及时处理
mLock.wait(timeout);
}
final int waitState = evaluateCheckerCompletionLocked();
if (waitState == COMPLETED) {
// The monitors have returned; reset
waitedHalf = false;
continue;
} else if (waitState == WAITING) {
continue;
} else if (waitState == WAITED_HALF) {
if (!waitedHalf) {
Slog.i(TAG, "WAITED_HALF");
waitedHalf = true;
blockedCheckers = getCheckersWithStateLocked(WAITED_HALF);
subject = describeCheckersLocked(blockedCheckers);
pids = new ArrayList<>(mInterestingJavaPids);
doWaitedHalfDump = true;
} else {
continue;
}
} else {
//所有超时的handler
blockedCheckers = getCheckersWithStateLocked(OVERDUE);
subject = describeCheckersLocked(blockedCheckers);
allowRestart = mAllowRestart;
pids = new ArrayList<>(mInterestingJavaPids);
}
}
//打印handler消息
logWatchog(doWaitedHalfDump, subject, pids);
//杀掉system_server进程
Process.killProcess(Process.myPid());
System.exit(10);
}
public final class HandlerChecker implements Runnable {
public void scheduleCheckLocked(long handlerCheckerTimeoutMillis) {
mWaitMax = handlerCheckerTimeoutMillis;
if (mCompleted) {
// Safe to update monitors in queue, Handler is not in the middle of work
mMonitors.addAll(mMonitorQueue);
mMonitorQueue.clear();
}
//如果当前monitors为空并且消息队列中无消息
if ((mMonitors.size() == 0 && mHandler.getLooper().getQueue().isPolling())
|| (mPauseCount > 0)) {
mCompleted = true;
return;
}
if (!mCompleted) {
// we already have a check in flight, so no need
return;
}
mCompleted = false;
mCurrentMonitor = null;
mStartTime = SystemClock.uptimeMillis();
//把自身post到队列中,检测mMonitors耗时,如果mMonitors为空则仅检测handler中是否有阻塞消息,mMonitors中大多是检测锁对象是否及时释放
mHandler.postAtFrontOfQueue(this);
}
@Override
public void run() {
final int size = mMonitors.size();
for (int i = 0 ; i < size ; i++) {
synchronized (mLock) {
mCurrentMonitor = mMonitors.get(i);
}
mCurrentMonitor.monitor();
}
synchronized (mLock) {
mCompleted = true;
mCurrentMonitor = null;
}
}
}
}
//frameworks/base/services/core/java/com/android/server/Watchdog.java
public void addMonitor(Monitor monitor) {
synchronized (mLock) {
mMonitorChecker.addMonitorLocked(monitor);
}
}
//frameworks/base/services/core/java/com/android/server/Watchdog$HandlerChecker.java
void addMonitorLocked(Monitor monitor) {
mMonitorQueue.add(monitor);
}
//frameworks/base/services/core/java/com/android/server/am/ActivityManagerService.java
//单纯检测是否有方法长时间持有锁
public void monitor() {
synchronized (this) { }
}
//frameworks/base/services/core/java/com/android/server/input/InputManagerService.java
//检测是否持有一系列锁
public void monitor() {
synchronized (mInputFilterLock) { }
synchronized (mAssociationsLock) { /* Test if blocked by associations lock. */}
synchronized (mLidSwitchLock) { /* Test if blocked by lid switch lock. */ }
synchronized (mInputMonitors) { /* Test if blocked by input monitor lock. */ }
synchronized (mAdditionalDisplayInputPropertiesLock) { /* Test if blocked by props lock */ }
mBatteryController.monitor();
mNativeInputManger.monitor();
}
//frameworks/base/services/core/jni/com_android_server_input_InputManagerService.cpp
//分别检测reader writer线程是否有阻塞任务
static void nativeMonitor(JNIEnv* env, jobject nativeImplObj) {
NativeInputManager* im = getNativeInputManager(env, nativeImplObj);
im->getInputManager()->getReader().monitor();
im->getInputManager()->getDispatcher().monitor();
}
//frameworks/native/services/inputflinger/reader/InputReader.cpp
//inputReader Thread是否有长时间未读取的消息
void InputReader::monitor() {
std::unique_lock lock(mLock);
mEventHub->wake();
mReaderIsAliveCondition.wait(lock);
// Check the EventHub
mEventHub->monitor();
}