应用发布出去后总会面临崩溃的情况。有时候客户端的崩溃在程序员这边无法重现，自然也就无法定位到程序崩溃的位置。这时候，如果程序能够在崩溃的时候自动保存当时的堆栈信息，那么程序员就可以通过分析堆栈信息找到对应的崩溃代码位置了。经过百度后，发现有种方法是在程序崩溃的时候自动记录一下程序的 dump 文件然后通过 windbg 来分析。本文重点来讲解如何生成 dump 文件，使用 dump 分析 log 的方法参考

生成 dump 的方式有很多种。本文着重讲解以下三种：

使用 Windows 提供的函数 MiniDumpWriteDump 收集 dump
使用 Google Breakpad 收集 dump
使用 ProcDump.exe 收集 Crash Dump

MiniDumpWriteDump 方式

在这种方式中，所有的代码都是由自己来写。定制性很高，而且使用的是 Windows 系统 API ，没有 License 相关的纠纷。

注意： 使用 MiniDumpWriteDump 需要引用 DbgHelp.h 并添加动态库 DbgHelp.lib 到程序中。

在工程上点击右键->属性->连接器->输入->附件依赖项中添加 DbgHelp.lib 即可。如图：

add_library.gif

基本原理

Windows 提供了一套 SEH 机制(参考《Windows 核心编程》 23~25 章)来保证操作系统本身更加健壮。我们可以利用这个机制中的异常处理机制获取到我们程序崩溃的 dump 信息。

Windows 提供了一个函数 : SetUnhandledExceptionFilter，该方法提供给了程序处理异常的最后的机会。

WINBASEAPI LPTOP_LEVEL_EXCEPTION_FILTER WINAPI SetUnhandledExceptionFilter(_In_opt_ LPTOP_LEVEL_EXCEPTION_FILTER lpTopLevelExceptionFilter);

一般情况，该函数需要在程序开始的时候就被调用。那么当程序中任意线程崩溃的时候，都会导致我们指定的过滤函数(SetUnhandledExceptionFilter 注册的)执行。

注意： 在新版本的 VS 中，由于微软改变了 CRT 的实现方式，覆盖了用户自己编写的 SetUnhandledExceptionFilter 方法，因此导致了不能再崩溃的时候调用过滤函数。这时候，我们需要实现以下函数来使得 CRT 对 SetUnhandledExceptionFilter 的调用不起任何作用：

void DisableSetUnhandledExceptionFilter() {
    try {
        void* addr = (void*)SetUnhandledExceptionFilter;

        if (addr) {
            unsigned char code[16];
            int size = 0;

            code[size++] = 0x33;
            code[size++] = 0xC0;
            code[size++] = 0xC2;
            code[size++] = 0x04;
            code[size++] = 0x00;

            DWORD dwOldFlag, dwTempFlag;
            BOOL result1 = VirtualProtect(addr, size, PAGE_EXECUTE_READWRITE, &dwOldFlag);
            BOOL result2 = WriteProcessMemory(GetCurrentProcess(), addr, code, size, NULL);
            BOOL result3 = VirtualProtect(addr, size, dwOldFlag, &dwTempFlag);
        }
    } catch (const std::exception& e) {
        // 异常处理
    }
}

将上述代码放到我们实现 SetUnhandledExceptionFilter 之后，程序崩溃的时候，就能够正确调用到我们注册的过滤函数了。

实现 dump 收集

在程序开始的时候，调用 SetUnhandledExceptionFilter 方法，这样我们就可以在进程崩溃的时候收集到自己崩溃的 dump 文件了。以下代码中，当程序出现崩溃的时候，会在 C:\dump 文件夹下生成 test.dmp 文件。通过分析该文件，你就可以找到对应的崩溃位置。

static long __stdcall CrashInfocallback(_EXCEPTION_POINTERS *pexcp) {
    // 创建 Dump 文件
    // 进程内收集 dump
    HANDLE hDumpFile = CreateFileW(
        L"C:\\Dump\\Test.dmp",
        GENERIC_WRITE,
        0,
        nullptr,
        CREATE_ALWAYS,
        FILE_ATTRIBUTE_NORMAL,
        nullptr
    );
    if (hDumpFile != INVALID_HANDLE_VALUE) {
        // Dump 信息
        MINIDUMP_EXCEPTION_INFORMATION dumpInfo;
        dumpInfo.ExceptionPointers = pexcp;
        dumpInfo.ThreadId = GetCurrentThreadId();
        dumpInfo.ClientPointers = TRUE;
        // 写入 Dump 文件内容
        BOOL writeDump = MiniDumpWriteDump(
            GetCurrentProcess(),
            GetCurrentProcessId(),
            hDumpFile,
            MiniDumpNormal,
            &dumpInfo,
            nullptr,
            nullptr
        );
    }
    wprintf(L"Use my self collect dump file at %s", dump_file_path);
    system("pause");
    return 0;
}

int initial_crash_collection() {
    SetUnhandledExceptionFilter((LPTOP_LEVEL_EXCEPTION_FILTER)CrashInfocallback);
    DisableSetUnhandledExceptionFilter();

    return 0;
}

收集其他进程的 dump

当 Crash 的进程收集自身的 Crash 的时候，由于 crash 发生时进程本身的堆栈就已经有所损坏，如果这时候再次产生崩溃的时候，就会出现死循环，导致生成的 dump 为 0kb。参考:Windows平台下一个崩溃而导致的死锁分析。这时候，我们需要使用其他的进程来获取已经 crash 的进程的堆栈信息。收集其他进程的 dump 代码参考如下：

int collect_dump(DWORD PID) {
    HANDLE hProc = NULL;
    HANDLE hFile = NULL;
    BOOL bSuccess = FALSE;
    MiniDumpWriteDumpFun MiniDumpWriteDump;

    MiniDumpWriteDump = (MiniDumpWriteDumpFun)GetProcAddress(LoadLibrary(L"Dbghelp.dll"), "MiniDumpWriteDump");
    printf("MiniDumpWriteDump found at 0x%p\n", MiniDumpWriteDump);

    if (MiniDumpWriteDump == NULL) {
        printf("Can't resolve MiniDumpWriteDump. Exiting (%ld)\n", GetLastError());
        ExitProcess(0);
    }

    printf("Trying to dump PID: %d\n", PID);
    hProc = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, PID);
    printf("Process HANDLE 0x%p\n", hProc);

    if (hProc == NULL) {
        printf("HANDLE is NULL. Exiting (%ld)\n", GetLastError());
        ExitProcess(0);
    }

    hFile = CreateFile(L"memory.dmp", GENERIC_WRITE, FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
    printf("memory.dmp HANDLE 0x%p\n", hFile);

    if (hFile == INVALID_HANDLE_VALUE) {
        printf("Can't create memory.dmp. Exiting (%ld)\n", GetLastError());
        CloseHandle(hProc);
        ExitProcess(0);
    }

    bSuccess = MiniDumpWriteDump(hProc, PID, hFile, 2, NULL, NULL, NULL);
    printf("Process Completed (%d)(%ld)", (DWORD)bSuccess, GetLastError());

    CloseHandle(hProc);
    CloseHandle(hFile);
    return 0;
}

调用其他进程，并获取进程返回值

调用其他进程的方法有很多种。通过 system 函数调用的时候，我们没办法获取到调用进程的返回值，只能够知道调用成功失败。因此，参考 Windows API，我们使用 ShellExecuteExW 开启子进程，WaitForSingleObject 等待开启的进程结束，GetExitCodeProcess 获取进程运行结束的返回值，根据返回值的状态判断是否成功收集到了 dump 文件。

static int call_to_collect(wchar_t *dump_collect_exe, wchar_t *dump_file_exe_params) {
    SHELLEXECUTEINFOW shExecInfo = { 0 };
    shExecInfo.cbSize = sizeof(SHELLEXECUTEINFOW);
    shExecInfo.fMask = SEE_MASK_NOCLOSEPROCESS;

    shExecInfo.hwnd = nullptr;
    shExecInfo.lpVerb = L"open";
    shExecInfo.lpFile = dump_collect_exe;
    shExecInfo.lpParameters = dump_file_exe_params;
    shExecInfo.lpDirectory = nullptr;
    shExecInfo.nShow = SW_HIDE;
    shExecInfo.hInstApp = nullptr;

    BOOL execSuccess = ShellExecuteExW(&shExecInfo);

    if (execSuccess) {
        WaitForSingleObject(shExecInfo.hProcess, INFINITE);
        DWORD exitCode = 0;
        GetExitCodeProcess(shExecInfo.hProcess, &exitCode);
        if (exitCode == 0) {
            return 0;
        }
    } else {
    }

    return -1;
}

至此，在 crash 的时候，获取 dump 文件的代码就已经基本完整了。

Demo

Demo CrashTest

Demo DumpGenerate

Google Breakpad

Breakpad 是 Google 开源的一个收集 dump 文件的一种解决方案。它支持 Windows，Linux，Android 等市面上主流操作系统。下面我们来学习一下如何使用它来收集 Crash 的信息。

下载并编译 breakpad

下载 breakpad 以及其依赖的 gyp googletest

git clone https://chromium.googlesource.com/breakpad/breakpad
git clone https://chromium.googlesource.com/external/gyp
git clone https://github.com/google/googletest.git

安装 Python 2.7

用 gyp 打包 breakpad 时需要 python 支持。Python 安装参考官方文档

注意： 不能安装 Python 3.x
构建工程，并编译源代码
1. 修改 breakpad\src\build\common.gypi 文件。将 WarnAsError 的值从 true 改成 false。这时候在编译的时候，就不会将警告当成为错误，导致编译失败。
2. 拷贝上面下载的 googletest 到 breakpad\src 目录下，并将 googletest 重命名为 testing
3. 拷贝上面下载的 gyp 到 breakpad\src\tools 目录下
4. 在 breakpad\src 目录下执行 tools\gyp\gyp.bat --no-circular-check client\windows\breakpad_client.gyp 命令，成功后在breakpad\src\client\windows\ 就可以找到生成的 breakpad_client.sln 工程文件。
至此，breakpad 工程就构建成功了。双击 breakpad_client.sln 打开工程，build all，至此，我们会发现在 Release 或者 Debug 中生成了 4 个文件: common.lib, crash_generation_client.lib, crash_generation_server.lib, exception_handler.lib。至此，我们需要的文件都生成了。编译成功。

使用 breakpad 的使用

在工程中，crash_generation_app 工程为测试工程。我们可以参考该处的代码了解 breakpad 是如何使用的。

breakpad 收集 dump 的思路为实现一个常住进程，该进程用来收集 dump。此外，集成异常处理部分框架到我们需要监测的进程中。进程间通信使用的通信方式为命名管道。当存在收集 dump 的 server 进程时，使用 server 进程收集 dump；当不存在 server 的时候，crash 掉的进程尝试收集自身的 dump。

下面，我们参照 crash_generation_app 写一个简单实用 breakpad 的例子。

client 中集成 breakpad

参照编译步骤，分别编译出 x86 的 Debug 与 Release 版本的静态库与 x64 的 Debug 与 Release 版本的静态库

创建一个测试项目，命名为: crash_test。集成上面编译好的类库：common.lib, crash_generation_client.lib, exception_handler.lib。

同时，编译过程中我们需要 breakpad 工程中的头文件。故而需要将 breakpad 工程加入到头文件搜索路径中。

add_lib_for_debug_release.gif

我们需要在程序启动的时候，调用异常处理方法：

#include "google_breakpad_crash.h"
#include "google_breakpad/common/minidump_format.h"
#include "crash_generation/crash_generation_client.h"
#include "handler/exception_handler.h"
#include "common/ipc_protocol.h"

const wchar_t kPipeName[] = L"\\\\.\\pipe\\BreakpadCrashServices\\CrashServer";

static size_t kCustomInfoCount = 2;
static google_breakpad::CustomInfoEntry kCustomInfoEntries[] = {
    google_breakpad::CustomInfoEntry(L"prod", L"CrashTestApp"),
    google_breakpad::CustomInfoEntry(L"ver", L"1.0"),
};

bool ShowDumpResults(const wchar_t* dump_path,
    const wchar_t* minidump_id,
    void* context,
    EXCEPTION_POINTERS* exinfo,
    MDRawAssertionInfo* assertion,
    bool succeeded) {
    if (succeeded) {
        printf("dump guid is %ws\n", minidump_id);
    }
    else {
        printf("dump failed\n");
    }
    system("pause");
    return succeeded;
}

int initial_google_crash_collection() {
    using namespace google_breakpad;

    CustomClientInfo custom_info = { kCustomInfoEntries, kCustomInfoCount };

    ExceptionHandler *handle = new ExceptionHandler(L"C:\\dumps\\",
        NULL,
        ShowDumpResults,
        NULL,
        ExceptionHandler::HANDLER_ALL,
        MiniDumpNormal,
        kPipeName,
        &custom_info);

    return 0;
}

int main() {
    initial_google_crash_collection();

    printf("Ready to crash the app.\n");

    system("pause");

    int* x = 0;
    *x = 1;

    std::cout << "Hello World!\n";

    system("pause");

    return 0;
}

至此，测试程序就已经可以完成了。当不存在 server 的时候，该部分代码可以收集自身的 dump 信息。

收集 dump 的 server 的实现

创建一个新的项目，命名为 dump_generate。集成上面编译好的类库：common.lib, crash_generation_server.lib, exception_handler.lib。

同上面一样，需要将 breakpad 工程加入到头文件搜索路径中。

#include "google_breakpad/common/minidump_format.h"
#include "client/windows/crash_generation/crash_generation_server.h"
#include "client/windows/handler/exception_handler.h"
#include "client/windows/common/ipc_protocol.h"
#include "client/windows/crash_generation/client_info.h"

const wchar_t kPipeName[] = L"\\\\.\\pipe\\BreakpadCrashServices\\TestServer";

using namespace google_breakpad;

static CrashGenerationServer* crash_server = NULL;

static void ShowClientConnected(void* context, const ClientInfo* client_info) {
    printf("Client connected:\t\t%d\r\n", client_info->pid());
}

static void ShowClientCrashed(void* context, const ClientInfo* client_info, const std::wstring* dump_path) {

    CustomClientInfo custom_info = client_info->GetCustomInfo();
    if (custom_info.count <= 0) {
        return;
    }

    std::wstring str_line;
    for (size_t i = 0; i < custom_info.count; ++i) {
        if (i > 0) {
            str_line += L", ";
        }
        str_line += custom_info.entries[i].name;
        str_line += L": ";
        str_line += custom_info.entries[i].value;
    }

    wprintf(str_line.c_str());
    printf("\n");
}

static void ShowClientExited(void* context, const ClientInfo* client_info) {
    printf("Client exited:\t\t%d\r\n", client_info->pid());
}

int main() {
    if (crash_server) {
        return 1;
    }

    std::wstring dump_path = L"C:\\Dumps\\server";

    if (_wmkdir(dump_path.c_str()) && (errno != EEXIST)) {
        printf("Unable to create dump directory\n");
        return 1;
    }

    crash_server = new CrashGenerationServer(kPipeName,
        NULL,
        ShowClientConnected,
        NULL,
        ShowClientCrashed,
        NULL,
        ShowClientExited,
        NULL,
        NULL,
        NULL,
        true,
        &dump_path);

    if (!crash_server->Start()) {
        printf("Unable to start server\n");
        delete crash_server;
        crash_server = NULL;
    }

    MSG msg = {0};

    int index = 0;
    while (msg.message != WM_QUIT) { //while we do not close our application
        if (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) {
            TranslateMessage(&msg);
            DispatchMessage(&msg);
        }
    }

    return 0;
}

至此，测试使用的集成 breakpad 的测试程序就已经能够正常工作了。

测试

运行 dump_generate.exe 后运行 crash_test.exe。会发现在 C:\Dumps\server 路径下生成了 dump

 ![crash_self_collection_other_process.gif](https://upload-images.jianshu.io/upload_images/2159939-a445524b5c7f31ab.gif?imageMogr2/auto-orient/strip)

直接运行 crash_test.exe。会发现在 C:\Dumps 路径下生成了 dump

crash_self_collection.gif

ProcDump 方式

ProcDump 是 Windows 平台下的命令行工具，可以在指定的条件下生成 dump 文件。可用于以下方面

高CPU占用率的性能分析优化
程序停止响应的调试
First chance异常捕获
监视内存使用
结合系统性能计数器使用
...

在本例子中，我们可以在程序崩溃的时候，直接调用它来生成 dump 文件。这与我们自己写代码创建进程dump 的方式类似。只不过将自己写的工具换成了Windows 提供的 ProcDump 而已。

例如：

procdump -ma

执行以上命令就可以立即生成一份 pid 程序的 dump 到 path。

ProDump 工具应用及其广泛，如有兴趣，可参考官网。

Windows 收集 C++ dll 崩溃信息