Android之进程 crash监听

前言:
针对crash业务分析
代码
Android 10.0

详细:

一、日志分析

1.异常弹框日志

:54.358  1066  1066 E AndroidRuntime: java.lang.NullPointerException: Attempt to invoke virtual method 'boolean android.os.Handler.post(java.lang.Runnable)' on a null object reference
    Line 4065: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at com.example.myapplication.AutoCameraTestActivity.onClick(AutoCameraTestActivity.java:106)
    Line 4066: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at android.view.View.performClick(View.java:5637)
    Line 4067: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at android.view.View$PerformClick.run(View.java:22445)
    Line 4068: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at android.os.Handler.handleCallback(Handler.java:755)
    Line 4069: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at android.os.Handler.dispatchMessage(Handler.java:95)
    Line 4070: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at android.os.Looper.loop(Looper.java:154)
    Line 4071: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at android.app.ActivityThread.main(ActivityThread.java:6141)
    Line 4072: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at java.lang.reflect.Method.invoke(Native Method)
    Line 4073: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at com.android.internal.os.ZygoteInit$MethodAndArgsCaller.run(ZygoteInit.java:912)
    Line 4074: 11-05 23:17:54.358  1066  1066 E AndroidRuntime:     at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:802)

2.进程启动加载的数据

1149  1149 W System.err: java.lang.Exception: RuntimeInit
11-05 23:17:47.687  1149  1149 W System.err:    at com.android.internal.os.RuntimeInit.commonInit(RuntimeInit.java:122)
11-05 23:17:47.687  1149  1149 W System.err:    at com.android.internal.os.RuntimeInit.zygoteInit(RuntimeInit.java:288)
11-05 23:17:47.687  1149  1149 W System.err:    at com.android.internal.os.ZygoteConnection.handleChildProc(ZygoteConnection.java:757)
11-05 23:17:47.687  1149  1149 W System.err:    at com.android.internal.os.ZygoteConnection.runOnce(ZygoteConnection.java:243)
11-05 23:17:47.687  1149  1149 W System.err:    at com.android.internal.os.ZygoteInit.runSelectLoop(ZygoteInit.java:876)
11-05 23:17:47.688  1149  1149 W System.err:    at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:798)

二、代码分析
1.java层的异常弹框监听

com.android.internal.os.RuntimeInit.java

    private static final void commonInit() {
        //1.设置预处理异常业务,目的输出异常日志。此接口setUncaughtExceptionPreHandler为hide
        //2.设置默认异常处理业务,目的是kill应用和定制ams控制业务
        //1和2分开,可以避免普通应用把异常日志捕获而系统无法留档
        LoggingHandler loggingHandler = new LoggingHandler();
        RuntimeHooks.setUncaughtExceptionPreHandler(loggingHandler);
        Thread.setDefaultUncaughtExceptionHandler(new KillApplicationHandler(loggingHandler));
        ···
    }

    //预处理中的异常信息记录
    //注意:普通应用和system_server应用异常日志区别
    //共同TAG:FATAL EXCEPTION
    //异常日志采用ID为LOG_ID_CRASH的方式记录:Log.printlns(Log.LOG_ID_CRASH, Log.ERROR, tag, msg, tr)
    private static class LoggingHandler implements Thread.UncaughtExceptionHandler {
        public volatile boolean mTriggered = false;

        @Override
        public void uncaughtException(Thread t, Throwable e) {
            mTriggered = true;

            // Don't re-enter if KillApplicationHandler has already run
            if (mCrashing) return;

            if (mApplicationObject == null && (Process.SYSTEM_UID == Process.myUid())) {
                //system_server异常,日志打印
                Clog_e(TAG, "*** FATAL EXCEPTION IN SYSTEM PROCESS: " + t.getName(), e);
            } else {
               //普通应用异常,日志打印
                StringBuilder message = new StringBuilder();
                message.append("FATAL EXCEPTION: ").append(t.getName()).append("\n");
                final String processName = ActivityThread.currentProcessName();
                if (processName != null) {
                    message.append("Process: ").append(processName).append(", ");
                }
                message.append("PID: ").append(Process.myPid());
                Clog_e(TAG, message.toString(), e);
            }
        }
    }
    
    //通知ams处理异常业务
    private static class KillApplicationHandler implements Thread.UncaughtExceptionHandler {
        private final LoggingHandler mLoggingHandler;

        public KillApplicationHandler(LoggingHandler loggingHandler) {
            this.mLoggingHandler = Objects.requireNonNull(loggingHandler);
        }

        @Override
        public void uncaughtException(Thread t, Throwable e) {
            try {
                //如果日志在预处理之前没有抓取到,则再抓取一次
                ensureLogging(t, e);

                if (mCrashing) return;
                mCrashing = true;

                if (ActivityThread.currentActivityThread() != null) {
                    ActivityThread.currentActivityThread().stopProfiling();
                }
                //通知ams处理异常业务,例如通知dropbox记录异常信息、弹异常对话框等等
                ActivityManager.getService().handleApplicationCrash(
                        mApplicationObject, new ApplicationErrorReport.ParcelableCrashInfo(e));
            } catch (Throwable t2) {
                if (t2 instanceof DeadObjectException) {
                    // System process is dead; ignore
                } else {
                    try {
                        Clog_e(TAG, "Error reporting crash", t2);
                    } catch (Throwable t3) {
                        // Even Clog_e() fails!  Oh well.
                    }
                }
            } finally {
                //通过kill -9杀应用
                Process.killProcess(Process.myPid());
                System.exit(10);
            }
        }

        private void ensureLogging(Thread t, Throwable e) {
            if (!mLoggingHandler.mTriggered) {
                try {
                    mLoggingHandler.uncaughtException(t, e);
                } catch (Throwable loggingThrowable) {
                    // Ignored.
                }
            }
        }
    }

总结:
1)Zygote进程fork app进程时,调用RuntimeInit.zygoteInit,设置监听Thread.setDefaultUncaughtExceptionHandler(new UncaughtHandler())。这是在启动进程时,进程内部创建的java层异常监听
2)ActivityManagerService.handleApplicationCrash就是展示对话框的业务入口函数。
3)异常TAG搜索:FATAL EXCEPTION
4)这种方式无法监听jni异常
5)特别说明,
设置预处理异常业务,目的输出异常日志。此接口setUncaughtExceptionPreHandler为hide
设置默认异常处理业务,目的是kill应用和定制ams控制业务
将两者分开,可以避免普通应用把异常日志捕获而系统无法留档

2.Native层的异常弹框处理

com.android.server.SystemServer
private void startOtherServices() {
   ···
   mActivityManagerService.startObservingNativeCrashes();
   ···
}

com.android.server.am.ActivityManagerService
public void startObservingNativeCrashes() {
    final NativeCrashListener ncl = new NativeCrashListener(this);
    ncl.start();
}


com.android.server.am.NativeCrashListener
final class NativeCrashListener extends Thread {
    ···
    static final String DEBUGGERD_SOCKET_PATH = "/data/system/ndebugsocket";
    ···
    public void run() {
        final byte[] ackSignal = new byte[1];
        ···
        try {
            //创建socket服务端
            FileDescriptor serverFd = Os.socket(AF_UNIX, SOCK_STREAM, 0);
            final UnixSocketAddress sockAddr = UnixSocketAddress.createFileSystem(
                    DEBUGGERD_SOCKET_PATH);
            Os.bind(serverFd, sockAddr);
            Os.listen(serverFd, 1);
            Os.chmod(DEBUGGERD_SOCKET_PATH, 0777);

            while (true) {
                FileDescriptor peerFd = null;
                try {
                    //等待socket客户端连接
                    peerFd = Os.accept(serverFd, null /* peerAddress */);
                    if (peerFd != null) {
                        StructUcred credentials =
                                Os.getsockoptUcred(peerFd, SOL_SOCKET, SO_PEERCRED);
                        //socket客户端的uid为0才能处理异常数据
                        if (credentials.uid == 0) {
                            //消化native异常信息
                            consumeNativeCrashData(peerFd);
                        }
                    }
                } catch (Exception e) {
                    Slog.w(TAG, "Error handling connection", e);
                } finally {
                    if (peerFd != null) {
                        try {
                            Os.write(peerFd, ackSignal, 0, 1);
                        } catch (Exception e) {
                            
                        }
                        try {
                            Os.close(peerFd);
                        } catch (ErrnoException e) {
                            
                        }
                    }
                }
            }
        } catch (Exception e) {
            Slog.e(TAG, "Unable to init native debug socket!", e);
        }
    }
    
    void consumeNativeCrashData(FileDescriptor fd) {
        final byte[] buf = new byte[4096];
        final ByteArrayOutputStream os = new ByteArrayOutputStream(4096);

        try {
            StructTimeval timeout = StructTimeval.fromMillis(SOCKET_TIMEOUT_MILLIS);
            Os.setsockoptTimeval(fd, SOL_SOCKET, SO_RCVTIMEO, timeout);
            Os.setsockoptTimeval(fd, SOL_SOCKET, SO_SNDTIMEO, timeout);

            //从fd中读取信息到buf
            int headerBytes = readExactly(fd, buf, 0, 8);
            if (headerBytes != 8) {
                return;
            }

            //从buf中读取pid和signal
            int pid = unpackInt(buf, 0);
            int signal = unpackInt(buf, 4);

            // now the text of the dump
            if (pid > 0) {
                final ProcessRecord pr;
                synchronized (mAm.mPidsSelfLocked) {
                    pr = mAm.mPidsSelfLocked.get(pid);
                }
                if (pr != null) {
                    //如果是persistent进程,则不记录
                    if (pr.isPersistent()) {
                        return;
                    }

                    //将数据读取到os中
                    int bytes;
                    do {
                        // get some data
                        bytes = Os.read(fd, buf, 0, buf.length);
                        if (bytes > 0) {
                            if (buf[bytes-1] == 0) {
                                os.write(buf, 0, bytes-1);  // exclude the EOD token
                                break;
                            }
                            // no EOD, so collect it and read more
                            os.write(buf, 0, bytes);
                        }
                    } while (bytes > 0);
                    

                    synchronized (mAm) {
                        pr.setCrashing(true);
                        pr.forceCrashReport = true;
                    }
                    //把os数据转化为String
                    final String reportString = new String(os.toByteArray(), "UTF-8");
                    //报告原因
                    (new NativeCrashReporter(pr, signal, reportString)).start();
                } else {
                    Slog.w(TAG, "Couldn't find ProcessRecord for pid " + pid);
                }
            } else {
                Slog.e(TAG, "Bogus pid!");
            }
        } catch (Exception e) {
            Slog.e(TAG, "Exception dealing with report", e);
            // ugh, fail.
        }
    }
    
    class NativeCrashReporter extends Thread {
        ProcessRecord mApp;
        int mSignal;
        String mCrashReport;

        NativeCrashReporter(ProcessRecord app, int signal, String report) {
            super("NativeCrashReport");
            mApp = app;
            mSignal = signal;
            mCrashReport = report;
        }

        @Override
        public void run() {
            try {
                CrashInfo ci = new CrashInfo();
                ci.exceptionClassName = "Native crash";
                ci.exceptionMessage = Os.strsignal(mSignal);
                ci.throwFileName = "unknown";
                ci.throwClassName = "unknown";
                ci.throwMethodName = "unknown";
                ci.stackTrace = mCrashReport;
                //通知ams
                mAm.handleApplicationCrashInner("native_crash", mApp, mApp.processName, ci);//1
            } catch (Exception e) {
                Slog.e(TAG, "Unable to report native crash", e);
            }
        }
    }
}

总结
1)native监听实现是在线程中开启了一个while循环
2)注意,对于persistent进程,不做crash report
3)ams设置的native监听,是作为socket服务端,而客户端来自debuggerd进程
4)ams接收异常后,调用handleApplicationCrashInner

3.Ams.handleApplicationCrashInner分析
不管是java层还是native层的crash,最终都会通知Ams.handleApplicationCrashInner
1)分析handleApplicationCrashInner

ActivityManagerService

    void handleApplicationCrashInner(String eventType, ProcessRecord r, String processName,
            ApplicationErrorReport.CrashInfo crashInfo) {
        //events log日志:EventLogTags.AM_CRASH --> am_crash
        EventLog.writeEvent(EventLogTags.AM_CRASH, Binder.getCallingPid(),
                UserHandle.getUserId(Binder.getCallingUid()), processName,
                r == null ? -1 : r.info.flags,
                crashInfo.exceptionClassName,
                crashInfo.exceptionMessage,
                crashInfo.throwFileName,
                crashInfo.throwLineNumber);
        ···
        //异常信息注入dropbox
        addErrorToDropBox(
                eventType, r, processName, null, null, null, null, null, null, crashInfo);
        //app异常报告
        mAppErrors.crashApplication(r, crashInfo);
    }

a)分析crash异常时,也可以关注events日志tag:am_crash
b)执行是否重启app或者弹对话框进行人为点击确定
c)dropbox也会记录异常信息,前缀名称system_server/system_app/data_app。即/data/system/dropbox

    private static String processClass(ProcessRecord process) {
        if (process == null || process.pid == MY_PID) {//system_server进程
            return "system_server";
        } else if ((process.info.flags & ApplicationInfo.FLAG_SYSTEM) != 0) {//集成在system/app或priv-app的为system_app
            return "system_app";
        } else {//其他安装的为data_app
            return "data_app";
        }
    }

2)mAppErrors.crashApplication(r, crashInfo);

a)针对persistent或apexmodule进程,进行营救记录
b)过滤不弹对话框业务条件
c)通知handler处理对话框业务
d)等待handler并处理相关结果
com.android.server.am.AppErrors
    void crashApplicationInner(ProcessRecord r, ApplicationErrorReport.CrashInfo crashInfo,
            int callingPid, int callingUid) {
        ···
        //针对persistent或apexmodule进程,进行营救记录
        if (r != null) {
            ···
            if (r.isPersistent() || isApexModule) {
                RescueParty.noteAppCrash(mContext, r.uid);
            }

            mPackageWatchdog.onPackageFailure(r.getPackageListWithVersionCode());
        }

        ···
        synchronized (mService) {
            //过滤不弹对话框业务条件
            //这里可以实现IActivityController接口,从而满足无须弹框业务。ActivityTaskManagerService.setActivityController
            ···
            //通知handler处理对话框业务
            final Message msg = Message.obtain();
            msg.what = ActivityManagerService.SHOW_ERROR_UI_MSG;

            taskId = data.taskId;
            msg.obj = data;
            mService.mUiHandler.sendMessage(msg);
        }

        //等待handler并处理相关结果
        int res = result.get();//阻塞
        ···
        
    }

3)通知handler处理对话框业务
主要处理是否展示对话框业务

com.android.server.am.AppErrors
    void handleShowAppErrorUi(Message msg) {
        AppErrorDialog.Data data = (AppErrorDialog.Data) msg.obj;
        //针对anr业务是否需要展示对话框,默认不展示
        boolean showBackground = Settings.Secure.getInt(mContext.getContentResolver(),
                Settings.Secure.ANR_SHOW_BACKGROUND, 0) != 0;

        AppErrorDialog dialogToShow = null;
        ···
        synchronized (mService) {
            ···
            final boolean showFirstCrashDevOption = Settings.Secure.getIntForUser(
                    mContext.getContentResolver(),
                    Settings.Secure.SHOW_FIRST_CRASH_DIALOG_DEV_OPTION,
                    0,
                    mService.mUserController.getCurrentUserId()) != 0;
            //静默crash白名单。来自framework-res.apk的config_appsNotReportingCrashes,多个用“,”隔开
            //例如:com.android.settings,com.android.systemui
            final boolean crashSilenced = mAppsNotReportingCrashes != null &&
                    mAppsNotReportingCrashes.contains(proc.info.packageName);
            
            //默认展示对话框,可设置Settings.Global.HIDE_ERROR_DIALOGS为1,来隐藏对话框
            if ((mService.mAtmInternal.canShowErrorDialogs() || showBackground)
                    && !crashSilenced
                    && (showFirstCrashDevOption || data.repeating)) {
                proc.crashDialog = dialogToShow = new AppErrorDialog(mContext, mService, data);
                ···
            } else {
                // The device is asleep, so just pretend that the user
                // saw a crash dialog and hit "force quit".
                if (res != null) {
                    res.set(AppErrorDialog.CANT_SHOW);
                }
            }
        }
        
        if (dialogToShow != null) {
            Slog.i(TAG, "Showing crash dialog for package " + packageName + " u" + userId);
            dialogToShow.show();
        }
    }

针对异常对话框展示问题
a)局部,加白向framework-res.apk的config_appsNotReportingCrashes中加白处理
b)全局,设置Settings.Global.HIDE_ERROR_DIALOGS为1,Settings.Secure.ANR_SHOW_BACKGROUND为0

4)怎么实现等待handler处理?

com.android.server.am.AppErrorResult

final class AppErrorResult {
    //set就是notifyAll机制
    public void set(int res) {
        synchronized (this) {
            mHasResult = true;
            mResult = res;
            notifyAll();
        }
    }

   //get就是wait机制
    public int get() {
        synchronized (this) {
            while (!mHasResult) {
                try {
                    wait();
                } catch (InterruptedException e) {
                }
            }
        }
        return mResult;
    }

    boolean mHasResult = false;
    int mResult;
}

参考学习

https://juejin.cn/post/6844904006041468935

你可能感兴趣的:(Android之进程 crash监听)