之前在看android启动过程总是带着完成工作任务的目的去分析代码,但是对于一些代码的细节并不是很清楚,在这里就分析一下Init进程的执行过程。
以下框图简要描述系统进程间层次关系
Init进程是android系统起来之后启动的第一个进程,对于研究android系统的启动过程很重要。它是android系统中的始祖进程,USB守护进程(usbd),挂载守护进程(vold),无线接口守护进程(rild)等都是init进程的子进程。以下截图是我手机的运行进程情况,可以明显看出进程间的关系
还有一个PID=2的kthread进程,该进程用来创建内核空间的其它进程
直接根据代码来分析整个进程的执行过程。
int main(int argc, char **argv) { int fd_count = 0; struct pollfd ufds[4];//存放pollfd char *tmpdev; char* debuggable; char tmp[32]; int property_set_fd_init = 0; int signal_fd_init = 0; int keychord_fd_init = 0; if (!strcmp(basename(argv[0]), "ueventd")) return ueventd_main(argc, argv);//ueventd是init的软链接,执行这个进程的时候相当于执行init进程,然后根据进程名进入相应的执行流程 /* clear the umask */ umask(0); /* Get the basic filesystem setup we need put * together in the initramdisk on / and then we'll * let the rc file figure out the rest. */ mkdir("/dev", 0755);//创建一些必要的目录并分配权限 mkdir("/proc", 0755); mkdir("/sys", 0755); mount("tmpfs", "/dev", "tmpfs", 0, "mode=0755"); mkdir("/dev/pts", 0755); mkdir("/dev/socket", 0755); mount("devpts", "/dev/pts", "devpts", 0, NULL); mount("proc", "/proc", "proc", 0, NULL); mount("sysfs", "/sys", "sysfs", 0, NULL); /* We must have some place other than / to create the * device nodes for kmsg and null, otherwise we won't * be able to remount / read-only later on. * Now that tmpfs is mounted on /dev, we can actually * talk to the outside world. */
以上主要创建一些文件系统目录并挂载相应的文件系统,proc文件系统是重要的内核数据的接口,可以通过它读取一些系统信息还能操作内核参数
open_devnull_stdio();//重定向标准输入,输入,错误到/dev/__null__(dup2复制文件句柄,0,1,2分别代表标准输入 输出 错误) 屏蔽标准输入输出 log_init();//设置log信息输出设备/dev/__kmsg__,unlink之后其他进程无法访问,阅读源码定向到printk函数输出 初始化log系统 property_init();//初始化属性系统,这个可以以后分析 get_hardware_name(hardware, &revision); process_kernel_cmdline(); #ifdef HAVE_SELINUX INFO("loading selinux policy\n"); selinux_load_policy(); #endif is_charger = !strcmp(bootmode, "charger"); INFO("property init\n"); if (!is_charger) property_load_boot_defaults();
这里导入相应的处理函数,分析执行过程
static void import_kernel_cmdline(int in_qemu)
{
char cmdline[1024];
char *ptr;
int fd;
fd = open("/proc/cmdline", O_RDONLY);
if (fd >= 0) {
int n = read(fd, cmdline, 1023);
if (n < 0) n = 0;
/* get rid of trailing newline, it happens */
if (n > 0 && cmdline[n-1] == '\n') n--;
//读取/proc/cmdline中的信息,存放在cmdline字符数组并进行处理
cmdline[n] = 0;
close(fd);
} else {
cmdline[0] = 0;
}
ptr = cmdline;
while (ptr && *ptr) {
char *x = strchr(ptr, ' ');
if (x != 0) *x++ = 0;
import_kernel_nv(ptr, in_qemu);//根据' '间断符逐行分析文本
ptr = x;
}
/* don't expose the raw commandline to nonpriv processes */
chmod("/proc/cmdline", 0440);
}
static void import_kernel_nv(char *name, int in_qemu)
{
char *value = strchr(name, '=');
if (value == 0) {
if (!strcmp(name, "calibration"))
calibration = 1;//表示要校准还是什么?
return;
}
*value++ = 0;
if (*name == 0) return;
if (!in_qemu)
{
/* on a real device, white-list the kernel options */
if (!strcmp(name,"qemu")) {
strlcpy(qemu, value, sizeof(qemu));
} else if (!strcmp(name,"androidboot.console")) {
strlcpy(console, value, sizeof(console));
} else if (!strcmp(name,"androidboot.mode")) {
strlcpy(bootmode, value, sizeof(bootmode));//启动模式
} else if (!strcmp(name,"androidboot.serialno")) {
strlcpy(serialno, value, sizeof(serialno));
} else if (!strcmp(name,"androidboot.baseband")) {
strlcpy(baseband, value, sizeof(baseband));//基带
} else if (!strcmp(name,"androidboot.carrier")) {
strlcpy(carrier, value, sizeof(carrier));
} else if (!strcmp(name,"androidboot.bootloader")) {
strlcpy(bootloader, value, sizeof(bootloader));
} else if (!strcmp(name,"androidboot.hardware")) {
strlcpy(hardware, value, sizeof(hardware));
}//将以上设备信息存放在定义的字符数组中
} else {
/* in the emulator, export any kernel option with the
* ro.kernel. prefix */
char buff[32];
int len = snprintf( buff, sizeof(buff), "ro.kernel.%s", name );
if (len < (int)sizeof(buff)) {
property_set( buff, value );
}
}
}
get_hardware_name(hardware, &revision); snprintf(tmp, sizeof(tmp), "/init.%s.rc", hardware); init_parse_config_file(tmp);//分析相应硬件版本的rc文件
init.rc文件有自己相应的语法,分析rc文件也是根据对应的语法来分析,这里引入一片简单介绍init.rc语法的文章
Android init.rc脚本解析
int init_parse_config_file(const char *fn) { char *data; data = read_file(fn, 0);//这里通过read_file函数将fn文件中的数据全部读取到data缓冲区中,malloc分配空间 if (!data) return -1; //这里开始真正分析脚本中的命令 parse_config(fn, data); DUMP(); return 0; }
解析过程会先将init.rc文件action与service进行解析,然后插入到链表中依次执行,查看源码中对链表的定义
#ifndef _CUTILS_LIST_H_ #define _CUTILS_LIST_H_ #include <stddef.h> #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ //声明一个双向链表 struct listnode { struct listnode *next; struct listnode *prev; }; //计算结构体数据变量相对于结构体首地址的偏移量,这个很重要 #define node_to_item(node, container, member) \ (container *) (((char*) (node)) - offsetof(container, member)) //声明一个双向链表,并且指向自己 #define list_declare(name) \ struct listnode name = { \ .next = &name, \ .prev = &name, \ } //遍历链表 #define list_for_each(node, list) \ for (node = (list)->next; node != (list); node = node->next) //反向遍历链表 #define list_for_each_reverse(node, list) \ for (node = (list)->prev; node != (list); node = node->prev) void list_init(struct listnode *list);//初始化一个双向链表 void list_add_tail(struct listnode *list, struct listnode *item);//将结点添加至双向链表尾部 void list_remove(struct listnode *item); #define list_empty(list) ((list) == (list)->next) #define list_head(list) ((list)->next) #define list_tail(list) ((list)->prev) #ifdef __cplusplus }; #endif /* __cplusplus */ #endif
static list_declare(service_list); static list_declare(action_list); static list_declare(action_queue);
static void parse_config(const char *fn, char *s)
{
struct parse_state state;
char *args[INIT_PARSER_MAXARGS];//允许解析出来的命令行最多有64个参数
int nargs;
nargs = 0;
state.filename = fn;//文件名
state.line = 0;
state.ptr = s;//data
state.nexttoken = 0;
state.parse_line = parse_line_no_op;//此时解析函数是空操作
for (;;) {
switch (next_token(&state)) {//通过next_token函数来寻找字符数组中的关键标记
//这里面省略了一些字符的处理(如‘\r’, '\t', '"', ' '等),只针对有效字符进行处理('\0', '\n'等)
//#define T_EOF 0 #define T_TEXT 1 #define T_NEWLINE 2
case T_EOF:
state.parse_line(&state, 0, 0); goto parser_done;
return;
case T_NEWLINE:
if (nargs) {
int kw = lookup_keyword(args[0]);//这里将分析第一个参数所代表的关键字
//根据字符匹配返回已定义好的宏定义
if (kw_is(kw, SECTION)) {//当关键字是on或service或import
state.parse_line(&state, 0, 0); //此时相当于什么都没做
parse_new_section(&state, kw, nargs, args);//对state.parse_line进行填充
} else {
state.parse_line(&state, nargs, args);//对于NEWLINE不是on service import的调用parse_line,而在后面的填充中 //parse_line函数即parse_line_action
//回调相应的处理函数
}
nargs = 0;
}
break;
case T_TEXT://不处理
if (nargs < INIT_PARSER_MAXARGS) {
args[nargs++] = state.text;
}
break;
}
} parser_done:
list_for_each(node, &import_list) {//文件解析结束后解析新导入的rc文件
struct import *import = node_to_item(node, struct import, list);
int ret;
//循环取出rc文件的路径
INFO("importing '%s'", import->filename);
ret = init_parse_config_file(import->filename);//重新解析rc文件
if (ret)
ERROR("could not import file '%s' from '%s'\n",
import->filename, fn);
}
}
首先查看一下keywords.h这个文件,对分析过程有帮助
#ifndef KEYWORD//防止重复定义 int do_chroot(int nargs, char **args); int do_chdir(int nargs, char **args); int do_class_start(int nargs, char **args); int do_class_stop(int nargs, char **args); int do_class_reset(int nargs, char **args); int do_domainname(int nargs, char **args); int do_exec(int nargs, char **args); int do_export(int nargs, char **args); int do_hostname(int nargs, char **args); int do_ifup(int nargs, char **args); int do_insmod(int nargs, char **args); int do_mkdir(int nargs, char **args); int do_mount_all(int nargs, char **args); int do_mount(int nargs, char **args); int do_restart(int nargs, char **args); int do_restorecon(int nargs, char **args); int do_rm(int nargs, char **args); int do_rmdir(int nargs, char **args); int do_setcon(int nargs, char **args); int do_setenforce(int nargs, char **args); int do_setkey(int nargs, char **args); int do_setprop(int nargs, char **args); int do_setrlimit(int nargs, char **args); int do_setsebool(int nargs, char **args); int do_start(int nargs, char **args); int do_stop(int nargs, char **args); int do_trigger(int nargs, char **args); int do_symlink(int nargs, char **args); int do_sysclktz(int nargs, char **args); int do_write(int nargs, char **args); int do_copy(int nargs, char **args); int do_chown(int nargs, char **args); int do_chmod(int nargs, char **args); int do_loglevel(int nargs, char **args); int do_load_persist_props(int nargs, char **args); int do_wait(int nargs, char **args); int do_ubiattach(int argc, char **args); int do_ubidetach(int argc, char **args); #define __MAKE_KEYWORD_ENUM__ #define KEYWORD(symbol, flags, nargs, func) K_##symbol,//#与##是宏定义的连接符 enum { K_UNKNOWN, #endif KEYWORD(capability, OPTION, 0, 0)//这里返回的相当于K_capabikity KEYWORD(chdir, COMMAND, 1, do_chdir)//K_chdir KEYWORD(chroot, COMMAND, 1, do_chroot)//K_chroot KEYWORD(class, OPTION, 0, 0) KEYWORD(class_start, COMMAND, 1, do_class_start) KEYWORD(class_stop, COMMAND, 1, do_class_stop) KEYWORD(class_reset, COMMAND, 1, do_class_reset) KEYWORD(console, OPTION, 0, 0) KEYWORD(critical, OPTION, 0, 0) KEYWORD(dalvik_recache, OPTION, 0, 0) KEYWORD(disabled, OPTION, 0, 0) KEYWORD(domainname, COMMAND, 1, do_domainname) KEYWORD(exec, COMMAND, 1, do_exec) KEYWORD(export, COMMAND, 2, do_export) KEYWORD(group, OPTION, 0, 0) KEYWORD(hostname, COMMAND, 1, do_hostname) KEYWORD(ifup, COMMAND, 1, do_ifup) KEYWORD(insmod, COMMAND, 1, do_insmod) KEYWORD(import, SECTION, 1, 0) KEYWORD(keycodes, OPTION, 0, 0) KEYWORD(mkdir, COMMAND, 1, do_mkdir) KEYWORD(mount_all, COMMAND, 1, do_mount_all) KEYWORD(mount, COMMAND, 3, do_mount) KEYWORD(on, SECTION, 0, 0) KEYWORD(oneshot, OPTION, 0, 0) KEYWORD(onrestart, OPTION, 0, 0) KEYWORD(restart, COMMAND, 1, do_restart) KEYWORD(restorecon, COMMAND, 1, do_restorecon) KEYWORD(rm, COMMAND, 1, do_rm) KEYWORD(rmdir, COMMAND, 1, do_rmdir) KEYWORD(seclabel, OPTION, 0, 0) KEYWORD(service, SECTION, 0, 0) KEYWORD(setcon, COMMAND, 1, do_setcon) KEYWORD(setenforce, COMMAND, 1, do_setenforce) KEYWORD(setenv, OPTION, 2, 0) KEYWORD(setkey, COMMAND, 0, do_setkey) KEYWORD(setprop, COMMAND, 2, do_setprop) KEYWORD(setrlimit, COMMAND, 3, do_setrlimit) KEYWORD(setsebool, COMMAND, 1, do_setsebool) KEYWORD(socket, OPTION, 0, 0) KEYWORD(start, COMMAND, 1, do_start) KEYWORD(stop, COMMAND, 1, do_stop) KEYWORD(trigger, COMMAND, 1, do_trigger) KEYWORD(symlink, COMMAND, 1, do_symlink) KEYWORD(sysclktz, COMMAND, 1, do_sysclktz) KEYWORD(user, OPTION, 0, 0) KEYWORD(wait, COMMAND, 1, do_wait) KEYWORD(write, COMMAND, 2, do_write) KEYWORD(copy, COMMAND, 2, do_copy) KEYWORD(chown, COMMAND, 2, do_chown) KEYWORD(chmod, COMMAND, 2, do_chmod) KEYWORD(loglevel, COMMAND, 1, do_loglevel) KEYWORD(load_persist_props, COMMAND, 0, do_load_persist_props) KEYWORD(ubiattach, COMMAND, 1, do_ubiattach) KEYWORD(ubidetach, COMMAND, 1, do_ubidetach) KEYWORD(ioprio, OPTION, 0, 0) #ifdef __MAKE_KEYWORD_ENUM__ KEYWORD_COUNT, }; #undef __MAKE_KEYWORD_ENUM__ #undef KEYWORD #endif以上通过枚举建立key-value映射,也就相当于map的功能,我们会通过查找key来执行对应的操作
再来查看init_parser.c这个文件,在其中两次include keywords.h这个文件
#include "keywords.h"//这是得到enum{K_UNKNOWN,K_capability,K_chdir,K_chroot......} #define KEYWORD(symbol, flags, nargs, func) \ [ K_##symbol ] = { #symbol, func, nargs + 1, flags, }, struct { const char *name; int (*func)(int nargs, char **args); unsigned char nargs; unsigned char flags; } keyword_info[KEYWORD_COUNT] = { [ K_UNKNOWN ] = { "unknown", 0, 0, 0 }, #include "keywords.h"//之前已经include得过,此时为[ K_capability ] = { "capability", 0, 1, OPTION } //[ K_chdir ] = { "chdir", do_chdir, 2, COMMAND } //[ K_chroot ] = { "chroot", do_chroot, 3, COMMAND} };此时keyword_info保存的关于键值对的结构体数组
实际上上面两次include的代码如下
int do_chroot(int nargs, char **args); … … enum { K_UNKNOWN, K_ capability, K_ chdir, … … } #define KEYWORD(symbol, flags, nargs, func) \ [ K_##symbol ] = { #symbol, func, nargs + 1, flags, }, struct { const char *name; int (*func)(int nargs, char **args); unsigned char nargs; unsigned char flags; } keyword_info[KEYWORD_COUNT] = { [ K_UNKNOWN ] = { "unknown", 0, 0, 0 }, [K_ capability] = {" capability ", 0, 1, OPTION }, [K_ chdir] = {"chdir", do_chdir ,2, COMMAND}, … … };
void parse_new_section(struct parse_state *state, int kw, int nargs, char **args) { printf("[ %s %s ]\n", args[0], nargs > 1 ? args[1] : ""); switch(kw) { case K_service: state->context = parse_service(state, nargs, args); if (state->context) { state->parse_line = parse_line_service; return; } break; case K_on: state->context = parse_action(state, nargs, args);//分析对应的on判断 其中nargs与args对应于命令的参数个数和参数列表,类似main函数 if (state->context) { state->parse_line = parse_line_action;//赋值给每个新行的parse_line return; } break; case K_import: parse_import(state, nargs, args); break; } state->parse_line = parse_line_no_op; }
static void *parse_action(struct parse_state *state, int nargs, char **args) { struct action *act;//这里查看下面引入的几个结构体 if (nargs < 2) { parse_error(state, "actions must have a trigger\n"); return 0; } if (nargs > 2) { parse_error(state, "actions may not have extra parameters\n"); return 0; }//限定nargs只能等于2 act = calloc(1, sizeof(*act)); act->name = args[1]; list_init(&act->commands);//初始化一个commands链表 list_add_tail(&action_list, &act->alist);//将当前act->alist结点添加到action_list链表尾部 /* XXX add to hash */ return act; } static void parse_line_action(struct parse_state* state, int nargs, char **args) { struct command *cmd; struct action *act = state->context; int (*func)(int nargs, char **args); int kw, n; if (nargs == 0) { return; } kw = lookup_keyword(args[0]); if (!kw_is(kw, COMMAND)) {//查找关键字是否为COMMAND,不是的就返回 parse_error(state, "invalid command '%s'\n", args[0]); return; } n = kw_nargs(kw); if (nargs < n) { parse_error(state, "%s requires %d %s\n", args[0], n - 1, n > 2 ? "arguments" : "argument"); return; } cmd = malloc(sizeof(*cmd) + sizeof(char*) * nargs); cmd->func = kw_func(kw);//这个时候就有对应的处理函数 cmd->nargs = nargs; memcpy(cmd->args, args, sizeof(char*) * nargs); list_add_tail(&act->commands, &cmd->clist);//将新建的cmd->clist节点添加到commands尾部 }对应的链表结构图如下
通常我们定义链表都是将结构体变量与指针放在一起,如下所示:
typedef struct DulNode{
ElemType data;
struct DulNode *prev;
struct DulNode *next;
}DulNode, *DuLinkList;
源码中这种建立链表的方式有些特别,建立只有指针的链表,将链表中的结点放在结构体中,通过求偏移量来访问结构体变量,提高了效率,值得借鉴
offsetof与container_of可以自己查阅学习
这里还涉及到一些结构体Action及对应的Command,Service也是如此
struct command
{
/* list of commands in an action */
struct listnode clist;
int (*func)(int nargs, char **args);
int nargs;
char *args[1];
};
struct action {
/* node in list of all actions */
struct listnode alist;
/* node in the queue of pending actions */
struct listnode qlist;
/* node in list of actions for a trigger */
struct listnode tlist;
unsigned hash;
const char *name;
struct listnode commands;
struct command *current;
};
struct socketinfo {
struct socketinfo *next;//这里用到单链表形式的结构体指针,用来管理多个socket
const char *name;
const char *type;
uid_t uid;
gid_t gid;
int perm;
};
struct svcenvinfo {
struct svcenvinfo *next;//这里用到单链表形式的结构体指针,管理多个env
const char *name;
const char *value;
};
struct service {
/* list of all services */
struct listnode slist;//链表结点
const char *name;//服务名
const char *classname;//class名 如class main等
unsigned flags;//标志
pid_t pid;//分配的进程号
time_t time_started; /* time of last start *///service启动的时间
time_t time_crashed; /* first crash within inspection window *///崩溃过程时间
int nr_crashed; /* number of times crashed within window *///崩溃次数
uid_t uid;//分配的用户id
gid_t gid;//分配的组id
gid_t supp_gids[NR_SVC_SUPP_GIDS];
size_t nr_supp_gids;
#ifdef HAVE_SELINUX
char *seclabel;
#endif
struct socketinfo *sockets;//socket信息结构体
struct svcenvinfo *envvars;//环境变量结构体
struct action onrestart; /* Actions to execute on restart. *///restart时需执行的action
/* keycodes for triggering this service via /dev/keychord */
int *keycodes;
int nkeycodes;
int keychord_id;
int ioprio_class;
int ioprio_pri;
int nargs;
/* "MUST BE AT THE END OF THE STRUCT" */
char *args[1];
}; /* ^-------'args' MUST be at the end of this struct! */
查看分析service的源码
static void *parse_service(struct parse_state *state, int nargs, char **args) { struct service *svc; if (nargs < 3) {//判断服务参数个数 parse_error(state, "services must have a name and a program\n"); return 0; } if (!valid_name(args[1])) {//判断服务名是否有效 parse_error(state, "invalid service name '%s'\n", args[1]); return 0; } svc = service_find_by_name(args[1]);//判断是否已经定义 if (svc) { parse_error(state, "ignored duplicate definition of service '%s'\n", args[1]); return 0; } nargs -= 2; svc = calloc(1, sizeof(*svc) + sizeof(char*) * nargs); if (!svc) { parse_error(state, "out of memory\n"); return 0; } svc->name = args[1]; svc->classname = "default"; memcpy(svc->args, args + 2, sizeof(char*) * nargs); svc->args[nargs] = 0; svc->nargs = nargs; svc->onrestart.name = "onrestart"; list_init(&svc->onrestart.commands);//初始化一个action onrestart的commands双向链表 list_add_tail(&service_list, &svc->slist);//将当前svc->slist结点添加至service_list链表 return svc; } static void parse_line_service(struct parse_state *state, int nargs, char **args) { struct service *svc = state->context; struct command *cmd; int i, kw, kw_nargs; if (nargs == 0) { return; } svc->ioprio_class = IoSchedClass_NONE; kw = lookup_keyword(args[0]); switch (kw) { case K_capability: break; case K_class: if (nargs != 2) { parse_error(state, "class option requires a classname\n"); } else { svc->classname = args[1]; } break; case K_console: svc->flags |= SVC_CONSOLE;//设置flags为SVC_CONSOLE break; case K_disabled: svc->flags |= SVC_DISABLED;//设置flags svc->flags |= SVC_RC_DISABLED; break; case K_ioprio: if (nargs != 3) { parse_error(state, "ioprio optin usage: ioprio <rt|be|idle> <ioprio 0-7>\n"); } else { svc->ioprio_pri = strtoul(args[2], 0, 8); if (svc->ioprio_pri < 0 || svc->ioprio_pri > 7) { parse_error(state, "priority value must be range 0 - 7\n"); break; } if (!strcmp(args[1], "rt")) { svc->ioprio_class = IoSchedClass_RT; } else if (!strcmp(args[1], "be")) { svc->ioprio_class = IoSchedClass_BE; } else if (!strcmp(args[1], "idle")) { svc->ioprio_class = IoSchedClass_IDLE; } else { parse_error(state, "ioprio option usage: ioprio <rt|be|idle> <0-7>\n"); } } break; case K_group: if (nargs < 2) { parse_error(state, "group option requires a group id\n"); } else if (nargs > NR_SVC_SUPP_GIDS + 2) { parse_error(state, "group option accepts at most %d supp. groups\n", NR_SVC_SUPP_GIDS); } else { int n; svc->gid = decode_uid(args[1]); for (n = 2; n < nargs; n++) { svc->supp_gids[n-2] = decode_uid(args[n]); } svc->nr_supp_gids = n - 2; } break; case K_keycodes: if (nargs < 2) { parse_error(state, "keycodes option requires atleast one keycode\n"); } else { svc->keycodes = malloc((nargs - 1) * sizeof(svc->keycodes[0])); if (!svc->keycodes) { parse_error(state, "could not allocate keycodes\n"); } else { svc->nkeycodes = nargs - 1; for (i = 1; i < nargs; i++) { svc->keycodes[i - 1] = atoi(args[i]); } } } break; case K_oneshot: svc->flags |= SVC_ONESHOT; break; case K_onrestart: nargs--; args++; kw = lookup_keyword(args[0]); if (!kw_is(kw, COMMAND)) { parse_error(state, "invalid command '%s'\n", args[0]); break; } kw_nargs = kw_nargs(kw); if (nargs < kw_nargs) { parse_error(state, "%s requires %d %s\n", args[0], kw_nargs - 1, kw_nargs > 2 ? "arguments" : "argument"); break; } cmd = malloc(sizeof(*cmd) + sizeof(char*) * nargs); cmd->func = kw_func(kw); cmd->nargs = nargs; memcpy(cmd->args, args, sizeof(char*) * nargs); list_add_tail(&svc->onrestart.commands, &cmd->clist); break; case K_critical: svc->flags |= SVC_CRITICAL; break; case K_dalvik_recache: svc->flags |= SVC_DALVIK_RECACHE; break; case K_setenv: { /* name value */ struct svcenvinfo *ei; if (nargs < 2) { parse_error(state, "setenv option requires name and value arguments\n"); break; } ei = calloc(1, sizeof(*ei)); if (!ei) { parse_error(state, "out of memory\n"); break; } ei->name = args[1]; ei->value = args[2]; ei->next = svc->envvars;//单链表操作 svc->envvars = ei;//单链表操作 break; } case K_socket: {/* name type perm [ uid gid ] */ struct socketinfo *si; if (nargs < 4) { parse_error(state, "socket option requires name, type, perm arguments\n"); break; } if (strcmp(args[2],"dgram") && strcmp(args[2],"stream") && strcmp(args[2],"seqpacket")) { parse_error(state, "socket type must be 'dgram', 'stream' or 'seqpacket'\n"); break; } si = calloc(1, sizeof(*si)); if (!si) { parse_error(state, "out of memory\n"); break; } si->name = args[1]; si->type = args[2]; si->perm = strtoul(args[3], 0, 8); if (nargs > 4) si->uid = decode_uid(args[4]); if (nargs > 5) si->gid = decode_uid(args[5]); si->next = svc->sockets;//这种插入方式是逆序插入 svc->sockets = si;//将新链表表头赋值给sockets break; } case K_user: if (nargs != 2) { parse_error(state, "user option requires a user id\n"); } else { svc->uid = decode_uid(args[1]); } break; case K_seclabel: #ifdef HAVE_SELINUX if (nargs != 2) { parse_error(state, "seclabel option requires a label string\n"); } else { svc->seclabel = args[1]; } #endif break; default: parse_error(state, "invalid option '%s'\n", args[0]); } }
从以上代码可以看出,paser_action主要解析一个Action刚开始的情况并添加到action_list链表,paser_line_action则解析Action中的command并添加到command链表
service的解析函数亦同理
最后分析import加入的新rc文件,init.rc文件便解析完成,并将所有的action和service分别添加到action_list和service_list链表
跟随代码,下面执行这些函数,这里可能有些疑惑,上面明显声明了三个链表,但是一直都没有涉及到action_queue这个链表。
action_for_each_trigger("early-init",action_add_queue_tail); queue_builtin_action(wait_for_coldboot_done_action, "wait_for_coldboot_done");
分析这两个个函数看到底做了什么处理,其中wait_for_coldboot_done_action是一个执行函数
void action_for_each_trigger(const char *trigger, void (*func)(struct action *act)) { struct listnode *node; struct action *act; list_for_each(node, &action_list) {//遍历已经完整的action_list链表,查找early-init action act = node_to_item(node, struct action, alist); if (!strcmp(act->name, trigger)) { func(act);//执行action_add_queue_tail } } } void action_add_queue_tail(struct action *act) { list_add_tail(&action_queue, &act->qlist);//将early-init action中的qlist结点添加到action_queue链表中(这里开始涉及到action_queue链表) } void queue_builtin_action(int (*func)(int nargs, char **args), char *name) { struct action *act; struct command *cmd; act = calloc(1, sizeof(*act));//首先新建一个action act->name = name; list_init(&act->commands);//初始化commands链表 cmd = calloc(1, sizeof(*cmd));//新建一个command结构体 cmd->func = func; cmd->args[0] = name; list_add_tail(&act->commands, &cmd->clist);//将cmd->clist结点添加到commands链表尾部 list_add_tail(&action_list, &act->alist);//将act->alist结点添加到上面的action_list尾部 action_add_queue_tail(act);//将这个action添加到action_queue链表尾部 }
从以上代码分析可看出action_for_each_trigger函数实现查找action_list中的action,并将其添加到action_queue尾部
queue_builtin_action则是新建一个action,将其分别添加到action_list和action_queue链表尾部
action_for_each_trigger("early-init", action_add_queue_tail);
queue_builtin_action(wait_for_coldboot_done_action, "wait_for_coldboot_done");
queue_builtin_action(keychord_init_action, "keychord_init");
queue_builtin_action(console_init_action, "console_init");
/* execute all the boot actions to get us started */
action_for_each_trigger("init", action_add_queue_tail);
/* skip mounting filesystems in charger mode */
if (!is_charger) {
action_for_each_trigger("early-fs", action_add_queue_tail);
action_for_each_trigger("fs", action_add_queue_tail);
action_for_each_trigger("post-fs", action_add_queue_tail);
action_for_each_trigger("post-fs-data", action_add_queue_tail);
}
queue_builtin_action(property_service_init_action, "property_service_init");
queue_builtin_action(signal_init_action, "signal_init");
queue_builtin_action(check_startup_action, "check_startup");
if (is_charger) {
action_for_each_trigger("charger", action_add_queue_tail);
} else {
action_for_each_trigger("early-boot", action_add_queue_tail);
queue_builtin_action(ubootenv_init_action, "ubootenv_init");
action_for_each_trigger("boot", action_add_queue_tail);
}
/* run all property triggers based on current state of the properties */
queue_builtin_action(queue_property_triggers_action, "queue_property_triggers");
#if BOOTCHART
queue_builtin_action(bootchart_init_action, "bootchart_init");
#endif
从以上的代码实现的功能都是类似的
接着阅读init.c后面的源码
for(;;) { int nr, i, timeout = -1; execute_one_command();//从链表中取出结点相应执行然后remove //分析过这个函数,在这里还有个疑问,该函数都是从action队列中去结点执行,但是系统的service是怎么执行的 //难道service链表不可能只注册不执行 //这里注意on boot section中最后一个command(class_start default),最终调用do_class_start
static struct command *get_first_command(struct action *act) { struct listnode *node; node = list_head(&act->commands); if (!node || list_empty(&act->commands)) return NULL; return node_to_item(node, struct command, clist); } static struct command *get_next_command(struct action *act, struct command *cmd) { struct listnode *node; node = cmd->clist.next; if (!node) return NULL; if (node == &act->commands) return NULL; return node_to_item(node, struct command, clist); } static int is_last_command(struct action *act, struct command *cmd) { return (list_tail(&act->commands) == &cmd->clist);//判断cmd->clist结点是否为act->commands链表最后一个 } void execute_one_command(void) { int ret; //第一次执行cur_action是action结构体指针,cur_command是command结构体指针,都为null if (!cur_action || !cur_command || is_last_command(cur_action, cur_command)) { cur_action = action_remove_queue_head();//从非空action_queue链表中取出头部结点并移除 cur_command = NULL; if (!cur_action)//cur_action为null时返回 return; INFO("processing action %p (%s)\n", cur_action, cur_action->name); cur_command = get_first_command(cur_action);//从cur_action中取出第一个command } else { cur_command = get_next_command(cur_action, cur_command);//依次取出后面的command } if (!cur_command)//cur_command为null时返回 return; ret = cur_command->func(cur_command->nargs, cur_command->args);//这里才开始执行command操作 INFO("command '%s' r=%d\n", cur_command->args[0], ret); }当执行完一个action中的所有command后,通过for(;;)再依次执行action_queue中的下一个action
因此action_queue链表中的顺序就是系统真正的执行顺序,如图所示
到这里,大体上弄清楚了init的执行过程,但是这里有个疑问,所有的action都已经执行完毕,根本没有涉及到service
查看init.rc文件我们可以看到在on boot这个action中对应的command为
class_start core
class_start main
根据之前的parse_line_action我们可以跟踪到do_class_start函数
int do_class_start(int nargs, char **args) { /* Starting a class does not start services * which are explicitly disabled. They must * be started individually. */ service_for_each_class(args[1], service_start_if_not_disabled); return 0; } void service_for_each_class(const char *classname, void (*func)(struct service *svc)) { struct listnode *node; struct service *svc; list_for_each(node, &service_list) {//遍历service_list链表 svc = node_to_item(node, struct service, slist);//从service_list链表中返回对应的service结构体 if (!strcmp(svc->classname, classname)) {//比较classname是否为core,main等 func(svc); } } }以上的classname为parse_line_service函数中解析的,即service中class一项
static void service_start_if_not_disabled(struct service *svc) { if (!(svc->flags & SVC_DISABLED)) {//判断svc的flags是否为DISABLED service_start(svc, NULL); } } void service_start(struct service *svc, const char *dynamic_args) { struct stat s; pid_t pid; int needs_console; int n; /* starting a service removes it from the disabled or reset * state and immediately takes it out of the restarting * state if it was in there */ svc->flags &= (~(SVC_DISABLED|SVC_RESTARTING|SVC_RESET)); svc->time_started = 0; /* running processes require no additional work -- if * they're in the process of exiting, we've ensured * that they will immediately restart on exit, unless * they are ONESHOT */ if (svc->flags & SVC_RUNNING) { return; } needs_console = (svc->flags & SVC_CONSOLE) ? 1 : 0; if (needs_console && (!have_console)) { ERROR("service '%s' requires console\n", svc->name); svc->flags |= SVC_DISABLED; return; } if (stat(svc->args[0], &s) != 0) { ERROR("cannot find '%s', disabling '%s'\n", svc->args[0], svc->name); svc->flags |= SVC_DISABLED; return; } if ((!(svc->flags & SVC_ONESHOT)) && dynamic_args) { ERROR("service '%s' must be one-shot to use dynamic args, disabling\n", svc->args[0]); svc->flags |= SVC_DISABLED; return; } NOTICE("starting '%s'\n", svc->name); //以上主要设置该服务的一些标志 pid = fork();//通过fork()创建子进程 if (pid == 0) {//此为子进程 struct socketinfo *si; struct svcenvinfo *ei; char tmp[32]; int fd, sz; if (properties_inited()) { get_property_workspace(&fd, &sz);//获取属性系统句柄 sprintf(tmp, "%d,%d", dup(fd), sz); add_environment("ANDROID_PROPERTY_WORKSPACE", tmp); } for (ei = svc->envvars; ei; ei = ei->next) add_environment(ei->name, ei->value);//为该服务添加环境变量 for (si = svc->sockets; si; si = si->next) { int socket_type = ( !strcmp(si->type, "stream") ? SOCK_STREAM : (!strcmp(si->type, "dgram") ? SOCK_DGRAM : SOCK_SEQPACKET)); int s = create_socket(si->name, socket_type,//创建通信socket,相当于每个service都创建socket,用来与其它进程通信 si->perm, si->uid, si->gid); if (s >= 0) { publish_socket(si->name, s);//将句柄s添加到环境变量中,该环境变量为ANDROID_SOCKET_XXX } } if (svc->ioprio_class != IoSchedClass_NONE) { if (android_set_ioprio(getpid(), svc->ioprio_class, svc->ioprio_pri)) {//设置pid ERROR("Failed to set pid %d ioprio = %d,%d: %s\n", getpid(), svc->ioprio_class, svc->ioprio_pri, strerror(errno)); } } if (needs_console) { setsid(); open_console();//打开控制台 } else { zap_stdio(); } #if 0 for (n = 0; svc->args[n]; n++) { INFO("args[%d] = '%s'\n", n, svc->args[n]); } for (n = 0; ENV[n]; n++) { INFO("env[%d] = '%s'\n", n, ENV[n]); } #endif //配置进程id和组 setpgid(0, getpid()); /* as requested, set our gid, supplemental gids, and uid */ if (svc->gid) { if (setgid(svc->gid) != 0) { ERROR("setgid failed: %s\n", strerror(errno)); _exit(127); } } if (svc->nr_supp_gids) { if (setgroups(svc->nr_supp_gids, svc->supp_gids) != 0) { ERROR("setgroups failed: %s\n", strerror(errno)); _exit(127); } } if (svc->uid) { if (setuid(svc->uid) != 0) { ERROR("setuid failed: %s\n", strerror(errno)); _exit(127); } } if (!dynamic_args) { if (execve(svc->args[0], (char**) svc->args, (char**) ENV) < 0) { ERROR("cannot execve('%s'): %s\n", svc->args[0], strerror(errno)); } } else { char *arg_ptrs[INIT_PARSER_MAXARGS+1]; int arg_idx = svc->nargs; char *tmp = strdup(dynamic_args); char *next = tmp; char *bword; /* Copy the static arguments */ memcpy(arg_ptrs, svc->args, (svc->nargs * sizeof(char *))); while((bword = strsep(&next, " "))) { arg_ptrs[arg_idx++] = bword; if (arg_idx == INIT_PARSER_MAXARGS) break; } arg_ptrs[arg_idx] = '\0'; execve(svc->args[0], (char**) arg_ptrs, (char**) ENV);//执行新进程调用的函数 } _exit(127); } if (pid < 0) {//fork()错误 ERROR("failed to start '%s'\n", svc->name); svc->pid = 0; return; } svc->time_started = gettime(); svc->pid = pid; svc->flags |= SVC_RUNNING; if (properties_inited()) notify_service_state(svc->name, "running");//设置服务运行状态 }
从上面可以看出service的运行过程,同理,service_list链表也得到执行
execute_one_command函数在for循环中一直查找action_queue链表中是否为空,不为空的情况下就移除队首的结点并执行,否则就直接返回
restart_processes();//判断是否有进程需要重启 if (!property_set_fd_init && get_property_set_fd() > 0) {//系统属性 ufds[fd_count].fd = get_property_set_fd();//获取property系统fd ufds[fd_count].events = POLLIN; ufds[fd_count].revents = 0; fd_count++; property_set_fd_init = 1;//设置标志,下一次循环不会执行 } if (!signal_fd_init && get_signal_fd() > 0) {//进程间通信 ufds[fd_count].fd = get_signal_fd();//获取子进程信号处理fd ufds[fd_count].events = POLLIN; ufds[fd_count].revents = 0; fd_count++; signal_fd_init = 1; } if (!keychord_fd_init && get_keychord_fd() > 0) {//组合键检测(系统刷机按键等) ufds[fd_count].fd = get_keychord_fd();//获取组合键fd ufds[fd_count].events = POLLIN; ufds[fd_count].revents = 0; fd_count++; keychord_fd_init = 1; } if (process_needs_restart) { timeout = (process_needs_restart - gettime()) * 1000; if (timeout < 0) timeout = 0; } if (!action_queue_empty() || cur_action) timeout = 0; #if BOOTCHART if (bootchart_count > 0) { if (timeout < 0 || timeout > BOOTCHART_POLLING_MS) timeout = BOOTCHART_POLLING_MS; if (bootchart_step() < 0 || --bootchart_count == 0) { bootchart_finish(); bootchart_count = 0; } } #endif nr = poll(ufds, fd_count, timeout); if (nr <= 0) continue; for (i = 0; i < fd_count; i++) { if (ufds[i].revents == POLLIN) { if (ufds[i].fd == get_property_set_fd()) handle_property_set_fd(); else if (ufds[i].fd == get_keychord_fd()) handle_keychord(); else if (ufds[i].fd == get_signal_fd()) handle_signal(); } } } return 0; }
static void restart_processes() { process_needs_restart = 0; service_for_each_flags(SVC_RESTARTING, restart_service_if_needed); } void service_for_each_flags(unsigned matchflags, void (*func)(struct service *svc)) { struct listnode *node; struct service *svc; list_for_each(node, &service_list) {//遍历service_list链表 svc = node_to_item(node, struct service, slist); if (svc->flags & matchflags) {//判断服务标志是否为RESTARTING,成立就回调函数执行 func(svc); } } } static void restart_service_if_needed(struct service *svc) { time_t next_start_time = svc->time_started + 5; if (next_start_time <= gettime()) {//当前时间不小于启动时间就重启该服务 svc->flags &= (~SVC_RESTARTING);//清除RESTARTING标志 service_start(svc, NULL); return; } if ((next_start_time < process_needs_restart) || (process_needs_restart == 0)) { process_needs_restart = next_start_time; } }
这样一个服务就会被重启,但是死亡的服务它的标志是怎么样被设置成RESTARTING,这里有疑惑
从后面的代码可以看出,inti采用I/O多路服用才监听3个句柄的情况,当可读时做相应的处理
在多路服用中,如果timeout==0 poll就不阻塞;如果timeout>0,poll只有当等待时间超时或有事件发生时才返回;如果timeout==-1就有事件发生才会返回
if (process_needs_restart) { timeout = (process_needs_restart - gettime()) * 1000;//设置等待时间 if (timeout < 0) timeout = 0; } if (!action_queue_empty() || cur_action)//如果action_queue和cur_action都不为空,timeout设为0,此时不阻塞,相当于此时不执行poll操作 timeout = 0; #if BOOTCHART if (bootchart_count > 0) { if (timeout < 0 || timeout > BOOTCHART_POLLING_MS) timeout = BOOTCHART_POLLING_MS; if (bootchart_step() < 0 || --bootchart_count == 0) { bootchart_finish(); bootchart_count = 0; } } #endif nr = poll(ufds, fd_count, timeout); if (nr <= 0)//如果超时等待,就不执行后面的处理,直接跳到for循环开始处,执行action或者重启service continue; for (i = 0; i < fd_count; i++) { if (ufds[i].revents == POLLIN) { if (ufds[i].fd == get_property_set_fd()) handle_property_set_fd(); else if (ufds[i].fd == get_keychord_fd()) handle_keychord(); else if (ufds[i].fd == get_signal_fd()) handle_signal(); } } }这里主要处理三大事件:属性设置事件,按键组合事件,子进程信号事件。前两者这里不做讨论,按键组合事件也只有在调试模式下才处理
从上面的分析中我们知道service是init调用fork创建的子进程,在Linux进程间通信中,可以通过SIGCHLD信号来通知子进程的状态
在之前的action_queue已经进行signal_init初始化
void signal_init(void) { int s[2]; struct sigaction act; act.sa_handler = sigchld_handler;//信号处理函数 act.sa_flags = SA_NOCLDSTOP; act.sa_mask = 0; act.sa_restorer = NULL; sigaction(SIGCHLD, &act, 0);//安装SIGCHLD信号处理器 /* create a signalling mechanism for the sigchld handler */ if (socketpair(AF_UNIX, SOCK_STREAM, 0, s) == 0) {//用于init进程中双端之间通信 signal_fd = s[0];//发送端socket fd signal_recv_fd = s[1];//接收端socket fd,并且被注册到poll系统监听 fcntl(s[0], F_SETFD, FD_CLOEXEC);//设置fd的属性 fcntl(s[0], F_SETFL, O_NONBLOCK);//非阻塞 fcntl(s[1], F_SETFD, FD_CLOEXEC); fcntl(s[1], F_SETFL, O_NONBLOCK); } handle_signal(); } static void sigchld_handler(int s) { write(signal_fd, &s, 1); } void handle_signal(void) { char tmp[32]; /* we got a SIGCHLD - reap and restart as needed */ read(signal_recv_fd, tmp, sizeof(tmp));//接收发送过来的数据 while (!wait_for_one_process(0))//一直执行wait_for_one_process,直到返回非0 ; }套接字可用于网络通信,也可以用于本机内的进程通信。由于本机内进程的IP地址都相同,因此只需要进程号来确定通信的双方。非网络通信套接字在Android系统中应用很多。Linux环境下使用socketpair函数创造一对未命名的、相互连接的UNIX域套接字。
在init进程中初始化signal_init,在init子进程死亡后会向init进程发送SIGCHLD信号,在init进程中已经注册该信号处理器sigchld_handler,在该函数中会向signal_fd发送信号的编号,而在另一端则接收这个数据,由于signal_recv_fd已注册在poll中,因此会调用handle_signal进行处理
static int wait_for_one_process(int block)
{
pid_t pid;
int status;
struct service *svc;
struct socketinfo *si;
time_t now;
struct listnode *node;
struct command *cmd;
//waitpid函数停止当前进程,等待子进程的结束,-1表示等待任何子进程,WNOHANG表示返回该进程的id,status为返回状态
while ( (pid = waitpid(-1, &status, block ? 0 : WNOHANG)) == -1 && errno == EINTR );
if (pid <= 0) return -1;//进程号不可能为负数,此时while循环退出
INFO("waitpid returned pid %d, status = %08x\n", pid, status);
svc = service_find_by_pid(pid);//通过pid从service_list中查找结点
if (!svc) {
ERROR("untracked pid %d exited\n", pid);
return 0;
}
NOTICE("process '%s', pid %d exited\n", svc->name, pid);
//判断service是否为oneshot,如果是代表只运行一次,则不需要再重启
if (!(svc->flags & SVC_ONESHOT)) {//如果service不为oneshot则需要重新启动,先杀死该服务创建的所有子进程
kill(-pid, SIGKILL);
NOTICE("process '%s' killing any children in process group\n", svc->name);
}
/* remove any sockets we may have created */
for (si = svc->sockets; si; si = si->next) {
char tmp[128];
snprintf(tmp, sizeof(tmp), ANDROID_SOCKET_DIR"/%s", si->name);
unlink(tmp);//释放该服务占用的所有socket资源
}
svc->pid = 0;//设置进程号为0
svc->flags &= (~SVC_RUNNING);//清除状态标志RUNNING
/* oneshot processes go into the disabled state on exit */
if (svc->flags & SVC_ONESHOT) {//如果设置了状态ONESHOT,则不需要重启,设置为DISABLED
svc->flags |= SVC_DISABLED;
}
/* disabled and reset processes do not get restarted automatically */
if (svc->flags & (SVC_DISABLED | SVC_RESET) ) {//如果状态设置了DISABLED或者RESET,则不需要重启
notify_service_state(svc->name, "stopped");//设置状态属性值为stopped
return 0;
}
now = gettime();
if (svc->flags & SVC_CRITICAL) {//如果service标志为CRITICAL
if (svc->time_crashed + CRITICAL_CRASH_WINDOW >= now) {//如果崩溃时间超过4分钟
if (++svc->nr_crashed > CRITICAL_CRASH_THRESHOLD) {//如果崩溃次数超过4次
ERROR("critical process '%s' exited %d times in %d minutes; "
"rebooting into recovery mode\n", svc->name,
CRITICAL_CRASH_THRESHOLD, CRITICAL_CRASH_WINDOW / 60);
android_reboot(ANDROID_RB_RESTART2, 0, "recovery");//重启手机
return 0;
}
} else {//重置状态值
svc->time_crashed = now;
svc->nr_crashed = 1;
}
}else if (svc->flags & SVC_DALVIK_RECACHE) {
if (svc->time_started + RECACHE_ENABLE_PHASE >= now) {
ERROR("recacheabl process '%s' exited at(%lu) ,start(%lu)",
svc->name, now, svc->time_started);
system("/system/xbin/busybox rm /data/dalvik-cache/*");
//android_reboot(ANDROID_RB_RESTART, 0, 0);
}
}
svc->flags |= SVC_RESTARTING;//设置service标志为RESTARTING,待restart_processes()函数重启该服务
/* Execute all onrestart commands for this service. */
list_for_each(node, &svc->onrestart.commands) {
cmd = node_to_item(node, struct command, clist);
cmd->func(cmd->nargs, cmd->args);
}
notify_service_state(svc->name, "restarting");//修改状态属性值为restarting
return 0;
}
init进程进入死循环,监听三大事件,并查询action_queue与service_list链表,是否有action需要执行,是否有service需要重启,并进行处理。
至此,分析完毕。
1.三大链表action_list,action_queue,service_list, action_queue才是真正用来查询执行的,因此它决定执行顺序
2.注意源码中node_to_item,该宏通过链表节点返回结构体引用
3.service_list中的service是何时才被执行,怎样执行的
4.init进程死循环中,execute_one_command()和restart_processes()函数是怎么执行action和重启service_list服务的,尤其是对service重启的处理