log分析

log分析用于定位一些无法断点调试的一些疑难杂症。所以需要熟悉bugreport到底有哪些内容,哪些模块都打印log。读完下文你会有不一样的认识。基于Android O源码

命令:

adb bugreport > bugreport.txt

源码:

#include 
#include 
#include 
#include 
#include 

#include 
#include 
int main() {

  fprintf(stderr, "=============================================================================\n");
  fprintf(stderr, "WARNING: flat bugreports are deprecated, use adb bugreport  instead\n");
  fprintf(stderr, "=============================================================================\n\n\n");

  // 启动dumpstate服务
  property_set("ctl.start", "dumpstate");

  //需要多次尝试,直到dumpstate服务启动完成,才能建立socket通信
  int s;
  for (int i = 0; i < 20; i++) {
    //创建socket client
    s = socket_local_client("dumpstate", ANDROID_SOCKET_NAMESPACE_RESERVED, SOCK_STREAM);
    if (s >= 0)
      break;
    // 休眠1s后再次尝试连接
    sleep(1);
  }

  if (s == -1) {
    printf("Failed to connect to dumpstate service: %s\n", strerror(errno));
    return 1;
  }

  //当3分钟没有任何数据可读,则超时停止读取并退出。
  //dumpstate服务中不存在大于1分钟的timetout,因而不可预见的超时的情况下留有很大的回旋余地。
  struct timeval tv;
  tv.tv_sec = 3 * 60;
  tv.tv_usec = 0;
  if (setsockopt(s, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) == -1) {
    printf("WARNING: Cannot set socket timeout: %s\n", strerror(errno));
  }

  while (1) {
    char buffer[65536];
    ssize_t bytes_read = TEMP_FAILURE_RETRY(read(s, buffer, sizeof(buffer)));
    if (bytes_read == 0) {
      break;
    } else if (bytes_read == -1) {
      // EAGAIN意味着timeout,Bugreport读异常终止
      if (errno == EAGAIN) {
        errno = ETIMEDOUT;
      }
      printf("\nBugreport read terminated abnormally (%s).\n", strerror(errno));
      break;
    }

    ssize_t bytes_to_send = bytes_read;
    ssize_t bytes_written;
    do {
        //循环读取信息
      bytes_written = TEMP_FAILURE_RETRY(write(STDOUT_FILENO,
                                               buffer + bytes_read - bytes_to_send,
                                               bytes_to_send));
      if (bytes_written == -1) {
        printf("Failed to write data to stdout: read %zd, trying to send %zd (%s)\n",
               bytes_read, bytes_to_send, strerror(errno));
        return 1;//将数据无法写入stdout
      }
      bytes_to_send -= bytes_written;
    } while (bytes_written != 0 && bytes_to_send > 0);
  }

  close(s);
  return 0;
}

至于这里property_set(“ctl.start”, “dumpstate”)为什么会写着启动服务,查阅资料说是触发init进程fork进程/system/bin/dumpstate

dumpstate.cpp

int main(int argc, char *argv[]) {
    //设置优先级,防止被lmk,可以说很牛逼了,直接给文件中写oom_adj的值,root权限就是吊,直接不死进程
    /* set as high priority, and protect from OOM killer */
    setpriority(PRIO_PROCESS, 0, -20);

    FILE* oom_adj = fopen("/proc/self/oom_score_adj", "we");
    if (oom_adj) {
        fputs("-1000", oom_adj);
        fclose(oom_adj);
    } else {
        /* fallback to kernels <= 2.6.35 */
        oom_adj = fopen("/proc/self/oom_adj", "we");
        if (oom_adj) {
            fputs("-17", oom_adj);
            fclose(oom_adj);
        }
    }
    
    /* parse arguments */
    //解析各种参数
    int c;
    while ((c = getopt(argc, argv, "dho:svqzpPBRSV:")) != -1) {
        switch (c) {
            // clang-format off
            case 'd': do_add_date = 1;            break;
            case 'z': do_zip_file = 1;            break;
            case 'o': use_outfile = optarg;       break;
            case 's': use_socket = 1;             break;
            case 'S': use_control_socket = 1;     break;
            case 'v': show_header_only = true;    break;
            case 'q': do_vibrate = 0;             break;
            case 'p': do_fb = 1;                  break;
            case 'P': ds.update_progress_ = true; break;
            case 'R': is_remote_mode = 1;         break;
            case 'B': do_broadcast = 1;           break;
            case 'V':                             break; // compatibility no-op
            case 'h':
                ShowUsageAndExit(0);
                break;
            default:
                fprintf(stderr, "Invalid option: %c\n", c);
                ShowUsageAndExit();
                // clang-format on
        }
    }
    ...
    // If we are going to use a socket, do it as early as possible to avoid timeouts from bugreport.
    //如果我们要使用套接字,请尽早执行,以避免在bugreport出现超时
    if (use_socket) {
        redirect_to_socket(stdout, "dumpstate");
    }
    
    /* 读取/proc/cmdline */
    FILE *cmdline = fopen("/proc/cmdline", "re");
    if (cmdline) {
        fgets(cmdline_buf, sizeof(cmdline_buf), cmdline);
        fclose(cmdline);
    }
    ds.PrintHeader();//打印头部信息
    if (telephony_only) {
        //...不执行这
    } else {
        // Dumps systrace right away, otherwise it will be filled with unnecessary events.
        // First try to dump anrd trace if the daemon is running. Otherwise, dump
        // the raw trace.
        //在dump_traces()之前调用以下dumpsys调用保持系统统计信息尽可能接近其初始状态。
        if (!dump_anrd_trace()) {
            dump_systrace();
        }
         // TODO: Drop root user and move into dumpstate() once b/28633932 is fixed.
        dump_raft();
        //collect stack traces from Dalvik and native processes (needs root)
        dump_traces_path = dump_traces();
        dumpstate();//核心
    }
    ...
    //通过广播处理发送给ActivityManager一些东西
}
static void dumpstate() {
    DurationReporter duration_reporter("DUMPSTATE");

    dump_dev_files("TRUSTY VERSION", "/sys/bus/platform/drivers/trusty", "trusty_version");
    //记录系统运行时长和休眠时长
    RunCommand("UPTIME", {"uptime"});
    //输出mmcblk0设备信息
    dump_files("UPTIME MMC PERF", mmcblk0, skip_not_stat, dump_stat_from_fd);
    dump_emmc_ecsd("/d/mmc0/mmc0:0001/ext_csd");
    //内存信息
    DumpFile("MEMORY INFO", "/proc/meminfo");
    //Cpu信息
    RunCommand("CPU INFO", {"top", "-b", "-n", "1", "-H", "-s", "6", "-o",
                            "pid,tid,user,pr,ni,%cpu,s,virt,res,pcy,cmd,name"});
    ...
    //输出kernel log
    do_dmesg();
    //所有已打开文件
    RunCommand("LIST OF OPEN FILES", {"lsof"}, CommandOptions::AS_ROOT);
    ...
    //打印SYSTEM LOG,EVENT LOG,RADIO LOG,LOG STATISTICS,LAST LOGCAT
    DoLogcat();
    ...
    //Binder log
    /* Binder state is expensive to look at as it uses a lot of memory. */
    DumpFile("BINDER FAILED TRANSACTION LOG", "/sys/kernel/debug/binder/failed_transaction_log");
    DumpFile("BINDER TRANSACTION LOG", "/sys/kernel/debug/binder/transaction_log");
    DumpFile("BINDER TRANSACTIONS", "/sys/kernel/debug/binder/transactions");
    DumpFile("BINDER STATS", "/sys/kernel/debug/binder/stats");
    DumpFile("BINDER STATE", "/sys/kernel/debug/binder/state");

    ...

    printf("========================================================\n");
    printf("== Running Application Activities\n");
    printf("========================================================\n");
    //dumpsys activity 的log
    RunDumpsys("APP ACTIVITIES", {"activity", "-v", "all"});

    printf("========================================================\n");
    printf("== Running Application Services\n");
    printf("========================================================\n");

    RunDumpsys("APP SERVICES", {"activity", "service", "all"});

    printf("========================================================\n");
    printf("== Running Application Providers\n");
    printf("========================================================\n");

    RunDumpsys("APP PROVIDERS", {"activity", "provider", "all"});

    printf("========================================================\n");
    printf("== Dropbox crashes\n");
    printf("========================================================\n");

    RunDumpsys("DROPBOX SYSTEM SERVER CRASHES", {"dropbox", "-p", "system_server_crash"});
    RunDumpsys("DROPBOX SYSTEM APP CRASHES", {"dropbox", "-p", "system_app_crash"});

    // DumpModemLogs adds the modem logs if available to the bugreport.
    // Do this at the end to allow for sufficient time for the modem logs to be
    // collected.
    DumpModemLogs();

    printf("========================================================\n");
    printf("== Final progress (pid %d): %d/%d (estimated %d)\n", ds.pid_, ds.progress_->Get(),
           ds.progress_->GetMax(), ds.progress_->GetInitialMax());
    printf("========================================================\n");
    printf("== dumpstate: done (id %d)\n", ds.id_);
    printf("========================================================\n");
}

所以核心在

static void DoLogcat() {
    unsigned long timeout;
    // DumpFile("EVENT LOG TAGS", "/etc/event-log-tags");
    // calculate timeout
    timeout = logcat_timeout("main") + logcat_timeout("system") + logcat_timeout("crash");
    if (timeout < 20000) {
        timeout = 20000;
    }
    RunCommand("SYSTEM LOG",
               {"logcat", "-v", "threadtime", "-v", "printable", "-v", "uid",
                        "-d", "*:v"},
               CommandOptions::WithTimeout(timeout / 1000).Build());
    timeout = logcat_timeout("events");
    if (timeout < 20000) {
        timeout = 20000;
    }
    RunCommand("EVENT LOG",
               {"logcat", "-b", "events", "-v", "threadtime", "-v", "printable", "-v", "uid",
                        "-d", "*:v"},
               CommandOptions::WithTimeout(timeout / 1000).Build());
    timeout = logcat_timeout("radio");
    if (timeout < 20000) {
        timeout = 20000;
    }
    RunCommand("RADIO LOG",
               {"logcat", "-b", "radio", "-v", "threadtime", "-v", "printable", "-v", "uid",
                        "-d", "*:v"},
               CommandOptions::WithTimeout(timeout / 1000).Build());

    RunCommand("LOG STATISTICS", {"logcat", "-b", "all", "-S"});

    /* kernels must set CONFIG_PSTORE_PMSG, slice up pstore with device tree */
    RunCommand("LAST LOGCAT",
                {"logcat", "-L", "-b", "all", "-v", "threadtime", "-v", "printable", "-v", "uid",
                        "-d", "*:v"});
}

大概都有:

dumpstate
MEMORY INFO

获取该log:读取文件/proc/meminfo

系统内存使用状态

CPU INFO

获取该log:执行/system/bin/top -n 1 -d 1 -m 30 -t

系统CPU使用状态

PROCRANK

获取该log:执行/system/bin/procrank

执行/system/xbin/procrank后输出的结果,查看一些内存使用状态

VIRTUAL MEMORY STATS

获取该log:读取文件/proc/vmstat

虚拟内存分配情况

vmalloc申请的内存则位于vmalloc_start~vmalloc_end之间,与物理地址没有简单的转换关系,虽然在逻辑上它们也是连续的,但是在物理上它们不要求连续。

VMALLOC INFO

获取该log:读取文件/proc/vmallocinfo

虚拟内存分配情况

SLAB INFO

获取该log:读取文件/proc/slabinfo

SLAB是一种内存分配器.这里输出该分配器的一些信息

ZONEINFO

获取该log:读取文件/proc/zoneinfo

zone info

SYSTEM LOG(需要着重分析)

获取该log:执行/system/bin/logcat -v time -d *:v

会输出在程序中输出的Log,用于分析系统的当前状态

VM TRACES

获取该log:读取文件/data/anr/traces.txt

因为每个程序都是在各自的VM中运行的,这个Log是现实各自VM的一些traces

EVENT LOG TAGS

获取该log:读取文件/etc/event-log-tags

EVENT LOG

获取该log:执行/system/bin/logcat -b events -v time -d *:v

输出一些Event的log

RADIO LOG

获取该log:执行/system/bin/logcat -b radio -v time -d *:v

显示一些无线设备的链接状态,如GSM,PHONE,STK(Satellite Tool Kit)…

NETWORK STATE

获取该log:执行/system/bin/netcfg (得到网络链接状态)

获取该log:读取文件/proc/net/route (得到路由状态)

显示网络链接和路由

SYSTEM PROPERTIES

获取该log:参考代码实现

显示一些系统属性,如Version,Services,network…

KERNEL LOG

获取该log:执行/system/bin/dmesg

显示Android内核输出的Log

KERNEL WAKELOCKS

获取该log:读取文件/proc/wakelocks

内核对一些程式和服务唤醒和休眠的一些记录

KERNEL CPUFREQ

(Linux kernel CPUfreq subsystem) Clock scaling allows you to change the clock speed of the CPUs on the fly.

This is a nice method to save battery power, because the lower the clock speed is, the less power the CPU consumes.

PROCESSES

获取该log:执行ps -P

显示当前进程

PROCESSES AND THREADS

获取该log:执行ps -t -p -P

显示当前进程和线程

LIBRANK

获取该log:执行/system/xbin/librank

剔除不必要的library

BINDER FAILED TRANSACTION LOG

获取该log:读取文件/proc/binder/failed_transaction_log

BINDER TRANSACTION LOG

获取该log:读取文件/proc/binder/transaction_log

BINDER TRANSACTIONS

获取该log:读取文件/proc/binder/transactions

BINDER STATS

获取该log:读取文件/proc/binder/stats

BINDER PROCESS STATE

获取该log:读取文件/proc/binder/proc/*

bind相关的一些状态

FILESYSTEMS

获取该log:执行/system/bin/df

主要文件的一些容量使用状态(cache,sqlite,dev…)

PACKAGE SETTINGS

获取该log:读取文件/data/system/packages.xml

系统中package的一些状态(访问权限,路径…),类似Windows里面的一些lnk文件吧.

PACKAGE UID ERRORS

获取该log:读取文件/data/system/uiderrors.txt

错误信息

KERNEL LAST KMSG LOG

最新kernel message log

LAST RADIO LOG

最新radio log

KERNEL PANIC CONSOLE LOG

KERNEL PANIC THREADS LOG

控制台/线程的一些错误信息log

BACKLIGHTS

获取该log:获取LCD brightness读/sys/class/leds/lcd-backlight/brightness

获取该log:获取Button brightness读/sys/class/leds/button-backlight/brightness

获取该log:获取Keyboard brightness读/sys/class/leds/keyboard-backlight/brightness

获取该log:获取ALS mode读/sys/class/leds/lcd-backlight/als

获取该log:获取LCD driver registers读/sys/class/leds/lcd-backlight/registers

获取相关亮度的一些信息

(2)build.prop
VERSION INFO输出下列信息

当前时间
当前内核版本:可以读取文件(/proc/version)获得
显示当前命令:可以读取文件夹(/proc/cmdline)获得
显示系统build的一些属性:可以读取文件(/system/build.prop)获得
输出系统一些属性
gsm.version.ril-impl
gsm.version.baseband
gsm.imei
gsm.sim.operator.numeric
gsm.operator.alpha

(3)dumpsys
执行/system/bin/dumpsys后可以获得这个log.
经常会发现该log输出不完整,因为代码里面要求该工具最多只执行60ms,可能会导致log无法完全输出来.
可以通过修改时间参数来保证log完全输出.
信息:
Currently running services
DUMP OF SERVICE services-name(running)

你可能感兴趣的:(Android系统,Android基础)