这段时间,断断续续的忙了一阵,因为父亲的去世也不情愿的休息了20来天,一点也没有办法。回来后重新开始学习android的启动流程。对android系统级别的学习,阅读代码成为了唯一的办法,不像应用程序开发来得那么明了快捷。之前花了好多时间才对android的binder驱动做了一定的了解,最近几天从android的启动画面,分析到了init这个内核最先启动的一个进程。参考内容包括老罗的android之旅和邓平凡老师的深入理解android卷,本人只是对学习做一定的总结,帮助自己进一步理解。
一 . init.c中的main函数(路径:system/core/init/init.c)
先给出main的源码,然后对个别关键函数进行分析
int main(int argc, char **argv)
{
int fd_count = 0;
struct pollfd ufds[4];
char *tmpdev;
char* debuggable;
char tmp[32];
int property_set_fd_init = 0;
int signal_fd_init = 0;
int keychord_fd_init = 0;
if (!strcmp(basename(argv[0]), "ueventd"))
return ueventd_main(argc, argv);
/* clear the umask */
umask(0);
/* Get the basic filesystem setup we need put
* together in the initramdisk on / and then we'll
* let the rc file figure out the rest.
*/
mkdir("/dev", 0755);
mkdir("/proc", 0755);
mkdir("/sys", 0755);
mount("tmpfs", "/dev", "tmpfs", 0, "mode=0755");
mkdir("/dev/pts", 0755);
mkdir("/dev/socket", 0755);
mount("devpts", "/dev/pts", "devpts", 0, NULL);
mount("proc", "/proc", "proc", 0, NULL);
mount("sysfs", "/sys", "sysfs", 0, NULL);
/* We must have some place other than / to create the
* device nodes for kmsg and null, otherwise we won't
* be able to remount / read-only later on.
* Now that tmpfs is mounted on /dev, we can actually
* talk to the outside world.
*/
open_devnull_stdio();
log_init();
INFO("reading config file\n");
init_parse_config_file("/init.rc");
/* pull the kernel commandline and ramdisk properties file in */
import_kernel_cmdline(0);
get_hardware_name(hardware, &revision);
snprintf(tmp, sizeof(tmp), "/init.%s.rc", hardware);//和平台硬件hardware有关系
init_parse_config_file(tmp);
action_for_each_trigger("early-init", action_add_queue_tail);
queue_builtin_action(wait_for_coldboot_done_action, "wait_for_coldboot_done");
queue_builtin_action(property_init_action, "property_init");
queue_builtin_action(keychord_init_action, "keychord_init");
queue_builtin_action(console_init_action, "console_init");//第二个开机画面显示函数
queue_builtin_action(set_init_properties_action, "set_init_properties");
/* execute all the boot actions to get us started */
action_for_each_trigger("init", action_add_queue_tail);
action_for_each_trigger("early-fs", action_add_queue_tail);
action_for_each_trigger("fs", action_add_queue_tail);
action_for_each_trigger("post-fs", action_add_queue_tail);
queue_builtin_action(property_service_init_action, "property_service_init");
queue_builtin_action(signal_init_action, "signal_init");
queue_builtin_action(check_startup_action, "check_startup");
/* execute all the boot actions to get us started */
action_for_each_trigger("early-boot", action_add_queue_tail);
action_for_each_trigger("boot", action_add_queue_tail);//把boot这个action添加到action_queue链表中
/* run all property triggers based on current state of the properties */
queue_builtin_action(queue_property_triggers_action, "queue_propety_triggers");
#if BOOTCHART
queue_builtin_action(bootchart_init_action, "bootchart_init");
#endif
for(;;) {
int nr, i, timeout = -1;
execute_one_command(); //检查action_queue列表是否为空,执行action
restart_processes();//检查是否有进程需要重启
if (!property_set_fd_init && get_property_set_fd() > 0) {
ufds[fd_count].fd = get_property_set_fd();
ufds[fd_count].events = POLLIN;
ufds[fd_count].revents = 0;
fd_count++;
property_set_fd_init = 1;
}
if (!signal_fd_init && get_signal_fd() > 0) {
ufds[fd_count].fd = get_signal_fd();
ufds[fd_count].events = POLLIN;
ufds[fd_count].revents = 0;
fd_count++;
signal_fd_init = 1;
}
if (!keychord_fd_init && get_keychord_fd() > 0) {
ufds[fd_count].fd = get_keychord_fd();
ufds[fd_count].events = POLLIN;
ufds[fd_count].revents = 0;
fd_count++;
keychord_fd_init = 1;
}
if (process_needs_restart) {
timeout = (process_needs_restart - gettime()) * 1000;
if (timeout < 0)
timeout = 0;
}
if (!action_queue_empty() || cur_action)
timeout = 0;
#if BOOTCHART
if (bootchart_count > 0) {
if (timeout < 0 || timeout > BOOTCHART_POLLING_MS)
timeout = BOOTCHART_POLLING_MS;
if (bootchart_step() < 0 || --bootchart_count == 0) {
bootchart_finish();
bootchart_count = 0;
}
}
#endif
nr = poll(ufds, fd_count, timeout);
if (nr <= 0)
continue;
for (i = 0; i < fd_count; i++) {
if (ufds[i].revents == POLLIN) {
if (ufds[i].fd == get_property_set_fd())
handle_property_set_fd();
else if (ufds[i].fd == get_keychord_fd())
handle_keychord();
else if (ufds[i].fd == get_signal_fd())
handle_signal();
}
}
}
return 0;
}
init作为用户空间第一个启动的进程,需要完成很多的任务。分以下部分内容来分析
1. uevent进程
if (!strcmp(basename(argv[0]), "ueventd"))
return ueventd_main(argc, argv);
这个函数是取出argv中的第一个参数,比如/sbin/ueventd,则basename为ueventd。android系统第一次启动的进程名init,所以该函数ueventd_main不执行,该函数的真正执行在init启动service ueventd /sbin/ueventd后,fork出一个子进程,execve启动/sbin/ueventd后,实际上该函数是对init的符合链接,也就是ueventd进程执行起来后执行的代码还是init.c中的main,因此不同的进程名执行相同的main函数。ueventd_main函数的主要功能:在Linux系统中现在都使用uevent机制来管理设备的热插拔事件,给用户空间权利来完成一些设备文件节点的创建。这种机制是建立在socket的通信机制上,用户空间和内核驱动进行交互,详细的机制没有去了解过。是linux2.6的版本中常用的机制。比如驱动出现device_create等时,会向用户空间报告一个uevent事件,用户空间由uevent进程解析后去创建设备节点。
2.init.rc的解析
INFO("reading config file\n");
init_parse_config_file("/init.rc");
init.rc是一个配置文件,内部有许多的语言规则,所有语言会在init_parse_config_file中进行解析。调用流程如下:init_parse_config_file—>read_file—>parse_config.
parse_config源码如下:
static void parse_config(const char *fn, char *s)//s为init.rc中字符串的内容
{
struct parse_state state;
char *args[INIT_PARSER_MAXARGS];
int nargs;
nargs = 0;
state.filename = fn;
state.line = 1;
state.ptr = s;
state.nexttoken = 0;
state.parse_line = parse_line_no_op;
for (;;) {
switch (next_token(&state)) {
case T_EOF: //文件的结尾
state.parse_line(&state, 0, 0);
return;
case T_NEWLINE://新的一行
if (nargs) {
int kw = lookup_keyword(args[0]); //读取init.rc返回关键字例如service,返回K_service
if (kw_is(kw, SECTION)) { //查看关键字是否为SECTION,只有service和on满足
state.parse_line(&state, 0, 0);
parse_new_section(&state, kw, nargs, args);
} else {
state.parse_line(&state, nargs, args);//on 和service两个段下面的内容
}
nargs = 0;
}
break;
case T_TEXT://文本内容
if (nargs < INIT_PARSER_MAXARGS) {
args[nargs++] = state.text;
}
break;
}
}
}
int init_parse_config_file(const char *fn)
{
char *data;
data = read_file(fn, 0);
if (!data) return -1;
parse_config(fn, data);
DUMP();
return 0;
}
这个函数中可以看到在for的无邪循环中,主要对init.rc的内容进行解析,以一行一行进行读取,每读取完一行内容换行时到下一行时,使用lookup_keyword分析已经读取的一行的第一个参数,部分代码如下:
case 's':
if (!strcmp(s, "ervice")) return K_service;
if (!strcmp(s, "etenv")) return K_setenv;
if (!strcmp(s, "etkey")) return K_setkey;
if (!strcmp(s, "etprop")) return K_setprop;
if (!strcmp(s, "etrlimit")) return K_setrlimit;
if (!strcmp(s, "ocket")) return K_socket;
if (!strcmp(s, "tart")) return K_start;
if (!strcmp(s, "top")) return K_stop;
if (!strcmp(s, "ymlink")) return K_symlink;
if (!strcmp(s, "ysclktz")) return K_sysclktz;
该函数主要对每一行的第一个字符做case,然后在strcmp第一个命令,这些命令都是按init.rc的格式要求来进行的。比如常用的service和on等经过lookup_keyword后返回K_servcie和K_on。随后使用kw_is(kw, SECTION)判断返回的kw是不是属于SECTION类型,在init.rc中只有service和on满足该类型,这样就会对on和service所在的段进行解析,我们这里首先分析service,以init.rc中的service zygote为例
service zygote /system/bin/app_process -Xzygote /system/bin --zygote --start-system-server
class main
socket zygote stream 666
onrestart write /sys/android_power/request_state wake
onrestart write /sys/power/state on
onrestart restart media
onrestart restart netd
当解析到这段代码时,执行parse_service
static void *parse_service(struct parse_state *state, int nargs, char **args)
{
struct service *svc;
if (nargs < 3) {
parse_error(state, "services must have a name and a program\n");
return 0;
}
if (!valid_name(args[1])) {
parse_error(state, "invalid service name '%s'\n", args[1]);
return 0;
}
svc = service_find_by_name(args[1]);//查找服务是否已经存在
if (svc) {
parse_error(state, "ignored duplicate definition of service '%s'\n", args[1]);
return 0;
}
nargs -= 2;
svc = calloc(1, sizeof(*svc) + sizeof(char*) * nargs);
if (!svc) {
parse_error(state, "out of memory\n");
return 0;
}
svc->name = args[1]; //sevice的名字
svc->classname = "default"; //svc的类名默认是default
memcpy(svc->args, args + 2, sizeof(char*) * nargs);//首个参数放的是可执行文件
svc->args[nargs] = 0;
svc->nargs = nargs;//参数个数
svc->onrestart.name = "onrestart";
list_init(&svc->onrestart.commands);
list_add_tail(&service_list, &svc->slist);
return svc;
}
在这里agrs[1]就是zygote,系统会先查找是否已经存在该服务,然后构建一个service svc,进行相关的填充,包括服务名,服务所属的类别名字,已经服务启动带入的参数个数(要减去service和服务名zygote),最后将这个svc加入到service_list全局链表中。随后所做的是对Service的下面几行Option进行解析,比如class,socket,onrestart等等。使用的是parse_line_service函数,如下:
static void parse_line_service(struct parse_state *state, int nargs, char **args)
{
struct service *svc = state->context;
struct command *cmd;
int i, kw, kw_nargs;
if (nargs == 0) {
return;
}
svc->ioprio_class = IoSchedClass_NONE;
kw = lookup_keyword(args[0]);
switch (kw) {
case K_capability:
break;
case K_class:
if (nargs != 2) {
parse_error(state, "class option requires a classname\n");
} else {
svc->classname = args[1];//比如main,core类
}
break;
case K_console:
svc->flags |= SVC_CONSOLE;
break;
case K_disabled:
svc->flags |= SVC_DISABLED;
......
case K_onrestart:
nargs--;
args++;
kw = lookup_keyword(args[0]);
if (!kw_is(kw, COMMAND)) {
parse_error(state, "invalid command '%s'\n", args[0]);
break;
}
kw_nargs = kw_nargs(kw);
if (nargs < kw_nargs) {
parse_error(state, "%s requires %d %s\n", args[0], kw_nargs - 1,
kw_nargs > 2 ? "arguments" : "argument");
break;
}
cmd = malloc(sizeof(*cmd) + sizeof(char*) * nargs);
cmd->func = kw_func(kw);
cmd->nargs = nargs;
memcpy(cmd->args, args, sizeof(char*) * nargs);
list_add_tail(&svc->onrestart.commands, &cmd->clist);
break;
.......
}
这里以class这个keyword为例,会将当前class所属的svc进行类名的变革,变为main类别,类似的socket和onrestart类似。
到此为止整个service都解析完成 ,开始下一个section的内容。但是zygote这个服务进程的启动还没有开始,将在下面分析。
下面分析on字段的内容,以on boot这个section作为列子进行分析
on boot
ifup lo
hostname localhost
domainname localdomain
....
# Set this property so surfaceflinger is not started by system_init
setprop system_init.startsurfaceflinger 0
class_start core
class_start main
和前面分析像类似,case中进入K_on选项执行函数parse_action
static void *parse_action(struct parse_state *state, int nargs, char **args)
{
struct action *act;
if (nargs < 2) {
parse_error(state, "actions must have a trigger\n");
return 0;
}
if (nargs > 2) {
parse_error(state, "actions may not have extra parameters\n");
return 0;
}
act = calloc(1, sizeof(*act));
act->name = args[1]; //action的名字如boot,init等
list_init(&act->commands);
list_add_tail(&action_list, &act->alist);
/* XXX add to hash */
return act;
}
在这里可以看到一个action结构体类似于service,这个action的名字为boot,最后会将这个action加入到全局链表action_list中。
随后执行parse_line_action函数,对on字段所在的option进行解析,代码如下:
static void parse_line_action(struct parse_state* state, int nargs, char **args) //action所在的行
{
struct command *cmd;
struct action *act = state->context;//on boot启动
int (*func)(int nargs, char **args);
int kw, n;
if (nargs == 0) {
return;
}
kw = lookup_keyword(args[0]);//命令的参数个数
if (!kw_is(kw, COMMAND)) {
parse_error(state, "invalid command '%s'\n", args[0]);
return;
}
n = kw_nargs(kw);
if (nargs < n) {
parse_error(state, "%s requires %d %s\n", args[0], n - 1,
n > 2 ? "arguments" : "argument");
return;
}
cmd = malloc(sizeof(*cmd) + sizeof(char*) * nargs);
cmd->func = kw_func(kw);
cmd->nargs = nargs;
memcpy(cmd->args, args, sizeof(char*) * nargs);
list_add_tail(&act->commands, &cmd->clist); //
这里以class_start main为例该关键字为 KEYWORD(class_start, COMMAND, 1, do_class_start),填充一个command结构体,包括这个cmd的执行函数如class_start对应的func为do_class_start,函数的参数个数nargs=1。同时将这个cmd添加到action的commands所在的全局列表中。本文中将会出现2个cmd。
至此,on和service两个section已经举列子分析完成。
3 下面继续分析main函数中的queue_builtin_action和action_for_each_trigger。
queue_builtin_action(console_init_action, "console_init");//第二个开机画面显示函数
该函数实现将console_init这个action添加到action_queue全局链表中看。
action_for_each_trigger("boot", action_add_queue_tail);//把boot这个action添加到action_queue链表中
void action_for_each_trigger(const char *trigger,
void (*func)(struct action *act))
{
struct listnode *node;
struct action *act;
list_for_each(node, &action_list) {
act = node_to_item(node, struct action, alist);
if (!strcmp(act->name, trigger)) {
func(act);
}
}
}
在该函数中,首先遍历action_list链表,找到action,看是否有名字叫boot的trigger存在,我们知道刚才在解析init.rc中的on boot时,将boot这个作为action的name加入到了action_list中去,所以可以找到这个boot的action。成功匹配后调用action_add_queue_tail,家这个action再次加入到action_queue中,等待着执行。
4 for(;;)循环中执行execute_one_command
void execute_one_command(void)
{
int ret;
if (!cur_action || !cur_command || is_last_command(cur_action, cur_command)) {
cur_action = action_remove_queue_head();
cur_command = NULL;
if (!cur_action)
return;
INFO("processing action %p (%s)\n", cur_action, cur_action->name);
cur_command = get_first_command(cur_action);
} else {
cur_command = get_next_command(cur_action, cur_command);
}
if (!cur_command)
return;
ret = cur_command->func(cur_command->nargs, cur_command->args);//执行class_start等
INFO("command '%s' r=%d\n", cur_command->args[0], ret);
}
使用action_remove_queue_head获取action_queue链表中的action后,移除该节点,使用get_first_command获得在action中的命令,比如这里出现的boot和console_init这两个action。针对console_init启动console_init_action这个函数。如果是boot则会对boot这个action所具有的commands链表进行cmd的获取,class_start的func指针函数为do_class_start:
int do_class_start(int nargs, char **args)
{
/* Starting a class does not start services
* which are explicitly disabled. They must
* be started individually.
*/
service_for_each_class(args[1], service_start_if_not_disabled);//查找要启动的舒服所属类是否是当前要启动的类
return 0;
}
可以看到提取了命令行的第二个参数入main,core等。在service_for_each_class中遍历service_list查找属于该类的service,如我们前面提到的zygote,查找到后执行service_start_if_not_disabled——>service_start至此我们进入了启动service的代码
void service_start(struct service *svc, const char *dynamic_args)
{
struct stat s;
pid_t pid;
int needs_console;
int n;
/* starting a service removes it from the disabled
* state and immediately takes it out of the restarting
* state if it was in there
*/
svc->flags &= (~(SVC_DISABLED|SVC_RESTARTING));
svc->time_started = 0;//服务的启动时间设为0
/* running processes require no additional work -- if
* they're in the process of exiting, we've ensured
* that they will immediately restart on exit, unless
* they are ONESHOT
*/
if (svc->flags & SVC_RUNNING) {
return;
}
needs_console = (svc->flags & SVC_CONSOLE) ? 1 : 0;
if (needs_console && (!have_console)) {
ERROR("service '%s' requires console\n", svc->name);
svc->flags |= SVC_DISABLED;
return;
}
if (stat(svc->args[0], &s) != 0) { //通过文件名获取文件信息保存到s的buf中
ERROR("cannot find '%s', disabling '%s'\n", svc->args[0], svc->name);
svc->flags |= SVC_DISABLED;
return;
}
if ((!(svc->flags & SVC_ONESHOT)) && dynamic_args) {
ERROR("service '%s' must be one-shot to use dynamic args, disabling\n",
svc->args[0]);
svc->flags |= SVC_DISABLED;
return;
}
NOTICE("starting '%s'\n", svc->name);
pid = fork();//创建子进程
if (pid == 0) { //子进程
struct socketinfo *si;
struct svcenvinfo *ei;
char tmp[32];
int fd, sz;
if (properties_inited()) {
get_property_workspace(&fd, &sz);
sprintf(tmp, "%d,%d", dup(fd), sz);
add_environment("ANDROID_PROPERTY_WORKSPACE", tmp);
}
for (ei = svc->envvars; ei; ei = ei->next)
add_environment(ei->name, ei->value);
for (si = svc->sockets; si; si = si->next) {
int socket_type = (
!strcmp(si->type, "stream") ? SOCK_STREAM :
(!strcmp(si->type, "dgram") ? SOCK_DGRAM : SOCK_SEQPACKET));
int s = create_socket(si->name, socket_type,
si->perm, si->uid, si->gid);//创建套接字
if (s >= 0) {
publish_socket(si->name, s);
}
}
if (svc->ioprio_class != IoSchedClass_NONE) {
if (android_set_ioprio(getpid(), svc->ioprio_class, svc->ioprio_pri)) {
ERROR("Failed to set pid %d ioprio = %d,%d: %s\n",
getpid(), svc->ioprio_class, svc->ioprio_pri, strerror(errno));
}
}
if (needs_console) {
setsid();
open_console();
} else {
zap_stdio();
}
#if 0
for (n = 0; svc->args[n]; n++) {
INFO("args[%d] = '%s'\n", n, svc->args[n]);
}
for (n = 0; ENV[n]; n++) {
INFO("env[%d] = '%s'\n", n, ENV[n]);
}
#endif
setpgid(0, getpid());
/* as requested, set our gid, supplemental gids, and uid */
if (svc->gid) {
setgid(svc->gid);
}
if (svc->nr_supp_gids) {
setgroups(svc->nr_supp_gids, svc->supp_gids);
}
if (svc->uid) {
setuid(svc->uid);
}
if (!dynamic_args) {
if (execve(svc->args[0], (char**) svc->args, (char**) ENV) < 0) {
ERROR("cannot execve('%s'): %s\n", svc->args[0], strerror(errno));//执行服务的可执行文件
}
} else {
char *arg_ptrs[INIT_PARSER_MAXARGS+1];
int arg_idx = svc->nargs;
char *tmp = strdup(dynamic_args);
char *next = tmp;
char *bword;
/* Copy the static arguments */
memcpy(arg_ptrs, svc->args, (svc->nargs * sizeof(char *)));
while((bword = strsep(&next, " "))) {
arg_ptrs[arg_idx++] = bword;
if (arg_idx == INIT_PARSER_MAXARGS)
break;
}
arg_ptrs[arg_idx] = '\0';
execve(svc->args[0], (char**) arg_ptrs, (char**) ENV);
}
_exit(127);
}
if (pid < 0) {
ERROR("failed to start '%s'\n", svc->name);
svc->pid = 0;
return;
}
svc->time_started = gettime();
svc->pid = pid;
svc->flags |= SVC_RUNNING;
if (properties_inited())
notify_service_state(svc->name, "running");
}
分析这段代码,主要内容:
a.检查当前service如zygote的flag即SVC_RUNNING(服务运行中),SVC_DISABLE等
b.fork一个子进程,子进程中会建立一个socket用于通信,同时使用if (execve(svc->args[0], (char**) svc->args, (char**) ENV) < 0)执行zygote对应的可执行文件,至此service zygote真正的启动。
到这里为止,对android系统的init启动有了清晰的了解,对init如何启动adbd,zygote等service有了一定的了解,以及对init.rc有了清晰的认识。init中还有部分内容等着后续几天做一定的学习。
补充:service进程的重启在restart_processes中进行,他会重启flag为SVC_RESTARTING的服务。这部分进程的重启其实在init由handle_signal来管理,一旦出现service崩溃,poll函数会接受到相关文件变化的信息,执行handle_signal中的wait_for_one_process
static int wait_for_one_process(int block)
{
pid_t pid;
int status;
struct service *svc;
struct socketinfo *si;
time_t now;
struct listnode *node;
struct command *cmd;
while ( (pid = waitpid(-1, &status, block ? 0 : WNOHANG)) == -1 && errno == EINTR );
if (pid <= 0) return -1;
INFO("waitpid returned pid %d, status = %08x\n", pid, status);
svc = service_find_by_pid(pid);
if (!svc) {
ERROR("untracked pid %d exited\n", pid);
return 0;
}
.....
svc->flags |= SVC_RESTARTING;
/* Execute all onrestart commands for this service. */
list_for_each(node, &svc->onrestart.commands) {
cmd = node_to_item(node, struct command, clist);
cmd->func(cmd->nargs, cmd->args);
}
notify_service_state(svc->name, "restarting");
return 0;
}
该函数使用waitpid,找到子进程退出的进程号pid,然后查找到该service,对service中的onrestart这个commands进行操作,入restart media等。同时将service的flag设置为SVC_RESTARTING,这样就结合前面讲到的restart_processes重新启动该服务进程。。