android system_server WatchDog简介

简介

android系统中SystemServer WatchDog的主要作用是监控SystemServer进程的运行状态,防止其卡住或者死锁。

具体来说,watchDog线程会定期去检查SystemServer线程的运行情况。如果发现SystemServer线程超过一定时间未有响应,watchDog会认为SystemServer进程发生了问题,这时它会采取以下行动:

1. 打印出SystemServer线程当前的堆栈信息,以帮助定位问题。

日志格式如下"Blocked in monitor(monitor 不为空)|Blocked in handler on(monitor为空)"

2. 重启SystemServer进程。watchDog线程会先杀死已卡住的SystemServer进程,然后重新fork出一个新的SystemServer进程。

通过这种机制,watchDog线程可以像一只“看门狗”一样时刻监视SystemServer的状态,一旦发现SystemServer发生故障,就可以及时采取行动重启它,从而提高系统的健壮性和稳定性。watchDog线程在系统启动时由Init进程 fork 出,它需要持续运行以保护 SystemServer 不会发生故障时无人管控的情况

实现方式:

通过监听system_server进程中时间敏感线程的调度时间来判断进程当前是否卡顿,或者长时间持锁

WatchDog初始化:

       //system_server前台线程
        mMonitorChecker = new HandlerChecker(FgThread.getHandler(),
                "foreground thread");
        mHandlerCheckers.add(withDefaultTimeout(mMonitorChecker));
        // Add checker for main thread.  We only do a quick check since there
        // can be UI running on the thread.
        //system_server主线程
        mHandlerCheckers.add(withDefaultTimeout(
                new HandlerChecker(new Handler(Looper.getMainLooper()), "main thread")));
        // Add checker for shared UI thread. system_server ui线程
        mHandlerCheckers.add(withDefaultTimeout(
                new HandlerChecker(UiThread.getHandler(), "ui thread")));
        // And also check IO thread.
        mHandlerCheckers.add(withDefaultTimeout(
                new HandlerChecker(IoThread.getHandler(), "i/o thread")));
        // And the display thread.
        mHandlerCheckers.add(withDefaultTimeout(
                new HandlerChecker(DisplayThread.getHandler(), "display thread")));
        // And the animation thread. system_server 动画执行线程
        mHandlerCheckers.add(withDefaultTimeout(
                 new HandlerChecker(AnimationThread.getHandler(), "animation thread")));
        // And the surface animation thread.
        mHandlerCheckers.add(withDefaultTimeout(
                new HandlerChecker(SurfaceAnimationThread.getHandler(),
                    "surface animation thread")));

         //检测是否binder线程池耗尽
        addMonitor(new BinderThreadMonitor());

  WatchDog中循环:

    public class Watchdog implements Dumpable {

      private void run() {
        boolean waitedHalf = false;

        while (true) {
            for (int i=0; i 0) {
                   //睡眠半个检测周期,后检测消息是否得到及时处理
                   mLock.wait(timeout);

                }

                final int waitState = evaluateCheckerCompletionLocked();
                if (waitState == COMPLETED) {
                    // The monitors have returned; reset
                    waitedHalf = false;
                    continue;
                } else if (waitState == WAITING) {
                    continue;
                } else if (waitState == WAITED_HALF) {
                    if (!waitedHalf) {
                        Slog.i(TAG, "WAITED_HALF");
                        waitedHalf = true;

                        blockedCheckers = getCheckersWithStateLocked(WAITED_HALF);
                        subject = describeCheckersLocked(blockedCheckers);
                        pids = new ArrayList<>(mInterestingJavaPids);
                        doWaitedHalfDump = true;
                    } else {
                        continue;
                    }
                } else {
                    //所有超时的handler
                    blockedCheckers = getCheckersWithStateLocked(OVERDUE);
                    subject = describeCheckersLocked(blockedCheckers);
                    allowRestart = mAllowRestart;
                    pids = new ArrayList<>(mInterestingJavaPids);
                }
            } 
            //打印handler消息
            logWatchog(doWaitedHalfDump, subject, pids);
            //杀掉system_server进程
            Process.killProcess(Process.myPid());
            System.exit(10);
             
        }


     public final class HandlerChecker implements Runnable {

        public void scheduleCheckLocked(long handlerCheckerTimeoutMillis) {
            mWaitMax = handlerCheckerTimeoutMillis;
            if (mCompleted) {
                // Safe to update monitors in queue, Handler is not in the middle of work
                mMonitors.addAll(mMonitorQueue);
                mMonitorQueue.clear();
            }
            //如果当前monitors为空并且消息队列中无消息
            if ((mMonitors.size() == 0 && mHandler.getLooper().getQueue().isPolling())
                    || (mPauseCount > 0)) {
                mCompleted = true;
                return;
            }
            if (!mCompleted) {
                // we already have a check in flight, so no need
                return;
            }

            mCompleted = false;
            mCurrentMonitor = null;
            mStartTime = SystemClock.uptimeMillis();
            //把自身post到队列中,检测mMonitors耗时,如果mMonitors为空则仅检测handler中是否有阻塞消息,mMonitors中大多是检测锁对象是否及时释放
            mHandler.postAtFrontOfQueue(this);
        }

        @Override
        public void run() {
            final int size = mMonitors.size();
            for (int i = 0 ; i < size ; i++) {
                synchronized (mLock) {
                    mCurrentMonitor = mMonitors.get(i);
                }
                mCurrentMonitor.monitor();
            }

            synchronized (mLock) {
                mCompleted = true;
                mCurrentMonitor = null;
            }
        }

     }


   }

 WatchDog addMonitor:

//frameworks/base/services/core/java/com/android/server/Watchdog.java

public void addMonitor(Monitor monitor) {
        synchronized (mLock) {
            mMonitorChecker.addMonitorLocked(monitor);
        }
}

//frameworks/base/services/core/java/com/android/server/Watchdog$HandlerChecker.java
 void addMonitorLocked(Monitor monitor) {
            mMonitorQueue.add(monitor);
        }

//frameworks/base/services/core/java/com/android/server/am/ActivityManagerService.java
   //单纯检测是否有方法长时间持有锁 
    public void monitor() {
        synchronized (this) { }
    }
//frameworks/base/services/core/java/com/android/server/input/InputManagerService.java
//检测是否持有一系列锁
public void monitor() {
        synchronized (mInputFilterLock) { }
        synchronized (mAssociationsLock) { /* Test if blocked by associations lock. */}
        synchronized (mLidSwitchLock) { /* Test if blocked by lid switch lock. */ }
        synchronized (mInputMonitors) { /* Test if blocked by input monitor lock. */ }
        synchronized (mAdditionalDisplayInputPropertiesLock) { /* Test if blocked by props lock */ }
        mBatteryController.monitor();
        mNativeInputManger.monitor();
    }

//frameworks/base/services/core/jni/com_android_server_input_InputManagerService.cpp
//分别检测reader writer线程是否有阻塞任务
static void nativeMonitor(JNIEnv* env, jobject nativeImplObj) {
    NativeInputManager* im = getNativeInputManager(env, nativeImplObj);

    im->getInputManager()->getReader().monitor();
    im->getInputManager()->getDispatcher().monitor();
}
//frameworks/native/services/inputflinger/reader/InputReader.cpp
//inputReader Thread是否有长时间未读取的消息
void InputReader::monitor() {
    std::unique_lock lock(mLock);
    mEventHub->wake();
    mReaderIsAliveCondition.wait(lock);
    // Check the EventHub
    mEventHub->monitor();
}

你可能感兴趣的:(java,linux,开发语言)