在init进程启动之前,Android系统的启动主要执行了如下三个部分操作。
接通电源并启动时,引导芯片代码从预定义的地方(固化在ROM)开始执行,加载引导程序 BootLoader 到RAM中。
Android系统运行前会先运行 BootLoader,它的作用是拉起并运行系统OS。
在内核的启动过程中,会设置缓存、加载驱动等。当内核完成系统设置后,它会在系统文件中寻找 init.rc 文件,解析并启动 init 进程。
下面讲解init进程启动过程。
文中所涉及的源码版本为 Android 14
/system/core/init/main.cpp
int main(int argc, char** argv) {
#if __has_feature(address_sanitizer)
__asan_set_error_report_callback(AsanReportCallback);
#elif __has_feature(hwaddress_sanitizer)
__hwasan_set_error_report_callback(AsanReportCallback);
#endif
// Boost prio which will be restored later
setpriority(PRIO_PROCESS, 0, -20);
if (!strcmp(basename(argv[0]), "ueventd")) {
return ueventd_main(argc, argv);
}
if (argc > 1) {
if (!strcmp(argv[1], "subcontext")) {
android::base::InitLogging(argv, &android::base::KernelLogger);
const BuiltinFunctionMap& function_map = GetBuiltinFunctionMap();
return SubcontextMain(argc, argv, &function_map);
}
if (!strcmp(argv[1], "selinux_setup")) {
// SELinux 初始化
return SetupSelinux(argv);
}
if (!strcmp(argv[1], "second_stage")) {
// 第二阶段
return SecondStageMain(argc, argv);
}
}
// 第一阶段
return FirstStageMain(argc, argv);
}
init进程的创建是以该main函数作为入口函数开始的,在main函数中,根据参数的不同,初始化的内容有所不同。在kernel启动进来的时候是不带其他 argv 参数的,就直接走到了FirstStageMain。
/system/core/init/first_stage_init.cpp
int FirstStageMain(int argc, char** argv) {
if (REBOOT_BOOTLOADER_ON_PANIC) {
InstallRebootSignalHandlers();
}
boot_clock::time_point start_time = boot_clock::now();
std::vector> errors;
#define CHECKCALL(x) \
if ((x) != 0) errors.emplace_back(#x " failed", errno);
// Clear the umask.
// 清空文件权限,设置 0777
umask(0);
CHECKCALL(clearenv());
CHECKCALL(setenv("PATH", _PATH_DEFPATH, 1));
// Get the basic filesystem setup we need put together in the initramdisk
// on / and then we'll let the rc file figure out the rest.
// 挂载 dev 目录
CHECKCALL(mount("tmpfs", "/dev", "tmpfs", MS_NOSUID, "mode=0755"));
// 远程登录后创建的控制台设备文件所在的目录
CHECKCALL(mkdir("/dev/pts", 0755));
// socket节点所在目录
CHECKCALL(mkdir("/dev/socket", 0755));
CHECKCALL(mkdir("/dev/dm-user", 0755));
CHECKCALL(mount("devpts", "/dev/pts", "devpts", 0, NULL));
#define MAKE_STR(x) __STRING(x)
// 挂载 proc 目录
CHECKCALL(mount("proc", "/proc", "proc", 0, "hidepid=2,gid=" MAKE_STR(AID_READPROC)));
#undef MAKE_STR
// Don't expose the raw commandline to unprivileged processes.
CHECKCALL(chmod("/proc/cmdline", 0440));
std::string cmdline;
android::base::ReadFileToString("/proc/cmdline", &cmdline);
// Don't expose the raw bootconfig to unprivileged processes.
chmod("/proc/bootconfig", 0440);
std::string bootconfig;
android::base::ReadFileToString("/proc/bootconfig", &bootconfig);
gid_t groups[] = {AID_READPROC};
CHECKCALL(setgroups(arraysize(groups), groups));
// 创建和挂载启动所需要的文件目录
// 挂载 sys 目录,用来访问内核信息
CHECKCALL(mount("sysfs", "/sys", "sysfs", 0, NULL));
// 挂载 /sys/fs/selinux 目录,selinux相关节点所在目录
CHECKCALL(mount("selinuxfs", "/sys/fs/selinux", "selinuxfs", 0, NULL));
// 创建 kmsg 节点,用于保存 kenel log
CHECKCALL(mknod("/dev/kmsg", S_IFCHR | 0600, makedev(1, 11)));
if constexpr (WORLD_WRITABLE_KMSG) {
CHECKCALL(mknod("/dev/kmsg_debug", S_IFCHR | 0622, makedev(1, 11)));
}
CHECKCALL(mknod("/dev/random", S_IFCHR | 0666, makedev(1, 8)));
CHECKCALL(mknod("/dev/urandom", S_IFCHR | 0666, makedev(1, 9)));
// This is needed for log wrapper, which gets called before ueventd runs.
CHECKCALL(mknod("/dev/ptmx", S_IFCHR | 0666, makedev(5, 2)));
CHECKCALL(mknod("/dev/null", S_IFCHR | 0666, makedev(1, 3)));
// 挂载 mnt 目录,用于挂载光驱和 usb 设备
CHECKCALL(mount("tmpfs", "/mnt", "tmpfs", MS_NOEXEC | MS_NOSUID | MS_NODEV,
"mode=0755,uid=0,gid=1000"));
CHECKCALL(mkdir("/mnt/vendor", 0755));
CHECKCALL(mkdir("/mnt/product", 0755));
CHECKCALL(mount("tmpfs", "/debug_ramdisk", "tmpfs", MS_NOEXEC | MS_NOSUID | MS_NODEV,
"mode=0755,uid=0,gid=0"));
// stage init
CHECKCALL(mount("tmpfs", kSecondStageRes, "tmpfs", MS_NOEXEC | MS_NOSUID | MS_NODEV,
"mode=0755,uid=0,gid=0"))
#undef CHECKCALL
SetStdioToDevNull(argv);
// 初始化 kernel 的log,输出定向到 /dev/kmsg,这样就可以从外界获取 kernel 的日志
InitKernelLogging(argv);
if (!errors.empty()) {
for (const auto& [error_string, error_errno] : errors) {
LOG(ERROR) << error_string << " " << strerror(error_errno);
}
LOG(FATAL) << "Init encountered errors starting first stage, aborting";
}
LOG(INFO) << "init first stage started!";
auto old_root_dir = std::unique_ptr{opendir("/"), closedir};
if (!old_root_dir) {
PLOG(ERROR) << "Could not opendir(\"/\"), not freeing ramdisk";
}
struct stat old_root_info;
if (stat("/", &old_root_info) != 0) {
PLOG(ERROR) << "Could not stat(\"/\"), not freeing ramdisk";
old_root_dir.reset();
}
auto want_console = ALLOW_FIRST_STAGE_CONSOLE ? FirstStageConsole(cmdline, bootconfig) : 0;
auto want_parallel =
bootconfig.find("androidboot.load_modules_parallel = \"true\"") != std::string::npos;
boot_clock::time_point module_start_time = boot_clock::now();
int module_count = 0;
if (!LoadKernelModules(IsRecoveryMode() && !ForceNormalBoot(cmdline, bootconfig), want_console,
want_parallel, module_count)) {
if (want_console != FirstStageConsoleParam::DISABLED) {
LOG(ERROR) << "Failed to load kernel modules, starting console";
} else {
LOG(FATAL) << "Failed to load kernel modules";
}
}
if (module_count > 0) {
auto module_elapse_time = std::chrono::duration_cast(
boot_clock::now() - module_start_time);
setenv(kEnvInitModuleDurationMs, std::to_string(module_elapse_time.count()).c_str(), 1);
LOG(INFO) << "Loaded " << module_count << " kernel modules took "
<< module_elapse_time.count() << " ms";
}
bool created_devices = false;
if (want_console == FirstStageConsoleParam::CONSOLE_ON_FAILURE) {
if (!IsRecoveryMode()) {
created_devices = DoCreateDevices();
if (!created_devices) {
LOG(ERROR) << "Failed to create device nodes early";
}
}
StartConsole(cmdline);
}
if (access(kBootImageRamdiskProp, F_OK) == 0) {
std::string dest = GetRamdiskPropForSecondStage();
std::string dir = android::base::Dirname(dest);
std::error_code ec;
if (!fs::create_directories(dir, ec) && !!ec) {
LOG(FATAL) << "Can't mkdir " << dir << ": " << ec.message();
}
if (!fs::copy_file(kBootImageRamdiskProp, dest, ec)) {
LOG(FATAL) << "Can't copy " << kBootImageRamdiskProp << " to " << dest << ": "
<< ec.message();
}
LOG(INFO) << "Copied ramdisk prop to " << dest;
}
// If "/force_debuggable" is present, the second-stage init will use a userdebug
// sepolicy and load adb_debug.prop to allow adb root, if the device is unlocked.
if (access("/force_debuggable", F_OK) == 0) {
constexpr const char adb_debug_prop_src[] = "/adb_debug.prop";
constexpr const char userdebug_plat_sepolicy_cil_src[] = "/userdebug_plat_sepolicy.cil";
std::error_code ec; // to invoke the overloaded copy_file() that won't throw.
if (access(adb_debug_prop_src, F_OK) == 0 &&
!fs::copy_file(adb_debug_prop_src, kDebugRamdiskProp, ec)) {
LOG(WARNING) << "Can't copy " << adb_debug_prop_src << " to " << kDebugRamdiskProp
<< ": " << ec.message();
}
if (access(userdebug_plat_sepolicy_cil_src, F_OK) == 0 &&
!fs::copy_file(userdebug_plat_sepolicy_cil_src, kDebugRamdiskSEPolicy, ec)) {
LOG(WARNING) << "Can't copy " << userdebug_plat_sepolicy_cil_src << " to "
<< kDebugRamdiskSEPolicy << ": " << ec.message();
}
// setenv for second-stage init to read above kDebugRamdisk* files.
setenv("INIT_FORCE_DEBUGGABLE", "true", 1);
}
if (ForceNormalBoot(cmdline, bootconfig)) {
mkdir("/first_stage_ramdisk", 0755);
PrepareSwitchRoot();
// SwitchRoot() must be called with a mount point as the target, so we bind mount the
// target directory to itself here.
if (mount("/first_stage_ramdisk", "/first_stage_ramdisk", nullptr, MS_BIND, nullptr) != 0) {
PLOG(FATAL) << "Could not bind mount /first_stage_ramdisk to itself";
}
SwitchRoot("/first_stage_ramdisk");
}
if (!DoFirstStageMount(!created_devices)) {
LOG(FATAL) << "Failed to mount required partitions early ...";
}
struct stat new_root_info;
if (stat("/", &new_root_info) != 0) {
PLOG(ERROR) << "Could not stat(\"/\"), not freeing ramdisk";
old_root_dir.reset();
}
if (old_root_dir && old_root_info.st_dev != new_root_info.st_dev) {
FreeRamdisk(old_root_dir.get(), old_root_info.st_dev);
}
SetInitAvbVersionInRecovery();
setenv(kEnvFirstStageStartedAt, std::to_string(start_time.time_since_epoch().count()).c_str(),
1);
// 找到 system 分区下的 init 的二进制文件目录
const char* path = "/system/bin/init";
// 重启 init 进程,进行 selinux 初始化工作
const char* args[] = {path, "selinux_setup", nullptr};
auto fd = open("/dev/kmsg", O_WRONLY | O_CLOEXEC);
dup2(fd, STDOUT_FILENO);
dup2(fd, STDERR_FILENO);
close(fd);
// 通过 execv 来重新启动 init 进程
execv(path, const_cast(args));
// execv() only returns if an error happened, in which case we
// panic and never fall through this conditional.
PLOG(FATAL) << "execv(\"" << path << "\") failed";
return 1;
}
在第一阶段中,会创建并挂载一些基本的目录,然后初始化 kernel log 等。在第一阶段 init 完成后,会使用 selinux_setup 参数执行 /system/bin/init。流程就会回到 init 进程的入口 main 函数,参数中含有 selinux_setup,此时会执行 SetupSelinux 函数。
/system/core/init/selinux.cpp
int SetupSelinux(char** argv) {
SetStdioToDevNull(argv);
InitKernelLogging(argv);
if (REBOOT_BOOTLOADER_ON_PANIC) {
InstallRebootSignalHandlers();
}
boot_clock::time_point start_time = boot_clock::now();
MountMissingSystemPartitions();
SelinuxSetupKernelLogging();
LOG(INFO) << "Opening SELinux policy";
PrepareApexSepolicy();
// Read the policy before potentially killing snapuserd.
std::string policy;
ReadPolicy(&policy);
CleanupApexSepolicy();
auto snapuserd_helper = SnapuserdSelinuxHelper::CreateIfNeeded();
if (snapuserd_helper) {
// Kill the old snapused to avoid audit messages. After this we cannot
// read from /system (or other dynamic partitions) until we call
// FinishTransition().
snapuserd_helper->StartTransition();
}
LoadSelinuxPolicy(policy);
if (snapuserd_helper) {
// Before enforcing, finish the pending snapuserd transition.
snapuserd_helper->FinishTransition();
snapuserd_helper = nullptr;
}
// This restorecon is intentionally done before SelinuxSetEnforcement because the permissions
// needed to transition files from tmpfs to *_contexts_file context should not be granted to
// any process after selinux is set into enforcing mode.
if (selinux_android_restorecon("/dev/selinux/", SELINUX_ANDROID_RESTORECON_RECURSE) == -1) {
PLOG(FATAL) << "restorecon failed of /dev/selinux failed";
}
SelinuxSetEnforcement();
// We're in the kernel domain and want to transition to the init domain. File systems that
// store SELabels in their xattrs, such as ext4 do not need an explicit restorecon here,
// but other file systems do. In particular, this is needed for ramdisks such as the
// recovery image for A/B devices.
if (selinux_android_restorecon("/system/bin/init", 0) == -1) {
PLOG(FATAL) << "restorecon failed of /system/bin/init failed";
}
setenv(kEnvSelinuxStartedAt, std::to_string(start_time.time_since_epoch().count()).c_str(), 1);
// 确认 init 二进制程序路径
const char* path = "/system/bin/init";
// 重启 init 进入第二阶段
const char* args[] = {path, "second_stage", nullptr};
execv(path, const_cast(args));
// execv() only returns if an error happened, in which case we
// panic and never return from this function.
PLOG(FATAL) << "execv(\"" << path << "\") failed";
return 1;
}
该阶段会进行 SELinux 的相关初始化,包括加载 SELinux 策略文件等。完成后会使用 second_stage 参数执行 /system/bin/init,流程就会回到 init 进程的入口 main 函数,参数中含有 second_stage,此时会执行 SecondStageMain 函数。
/system/core/init/init.cpp
int SecondStageMain(int argc, char** argv) {
if (REBOOT_BOOTLOADER_ON_PANIC) {
InstallRebootSignalHandlers();
}
// No threads should be spin up until signalfd
// is registered. If the threads are indeed required,
// each of these threads _should_ make sure SIGCHLD signal
// is blocked. See b/223076262
boot_clock::time_point start_time = boot_clock::now();
trigger_shutdown = [](const std::string& command) { shutdown_state.TriggerShutdown(command); };
SetStdioToDevNull(argv);
InitKernelLogging(argv);
LOG(INFO) << "init second stage started!";
SelinuxSetupKernelLogging();
// Update $PATH in the case the second stage init is newer than first stage init, where it is
// first set.
if (setenv("PATH", _PATH_DEFPATH, 1) != 0) {
PLOG(FATAL) << "Could not set $PATH to '" << _PATH_DEFPATH << "' in second stage";
}
// Init should not crash because of a dependence on any other process, therefore we ignore
// SIGPIPE and handle EPIPE at the call site directly. Note that setting a signal to SIG_IGN
// is inherited across exec, but custom signal handlers are not. Since we do not want to
// ignore SIGPIPE for child processes, we set a no-op function for the signal handler instead.
{
struct sigaction action = {.sa_flags = SA_RESTART};
action.sa_handler = [](int) {};
sigaction(SIGPIPE, &action, nullptr);
}
// Set init and its forked children's oom_adj.
if (auto result =
WriteFile("/proc/1/oom_score_adj", StringPrintf("%d", DEFAULT_OOM_SCORE_ADJUST));
!result.ok()) {
LOG(ERROR) << "Unable to write " << DEFAULT_OOM_SCORE_ADJUST
<< " to /proc/1/oom_score_adj: " << result.error();
}
// Set up a session keyring that all processes will have access to. It
// will hold things like FBE encryption keys. No process should override
// its session keyring.
keyctl_get_keyring_ID(KEY_SPEC_SESSION_KEYRING, 1);
// Indicate that booting is in progress to background fw loaders, etc.
close(open("/dev/.booting", O_WRONLY | O_CREAT | O_CLOEXEC, 0000));
// See if need to load debug props to allow adb root, when the device is unlocked.
const char* force_debuggable_env = getenv("INIT_FORCE_DEBUGGABLE");
bool load_debug_prop = false;
if (force_debuggable_env && AvbHandle::IsDeviceUnlocked()) {
load_debug_prop = "true"s == force_debuggable_env;
}
unsetenv("INIT_FORCE_DEBUGGABLE");
// Umount the debug ramdisk so property service doesn't read .prop files from there, when it
// is not meant to.
if (!load_debug_prop) {
UmountDebugRamdisk();
}
// 初始化属性服务
PropertyInit();
// Umount second stage resources after property service has read the .prop files.
UmountSecondStageRes();
// Umount the debug ramdisk after property service has read the .prop files when it means to.
if (load_debug_prop) {
UmountDebugRamdisk();
}
// Mount extra filesystems required during second stage init
MountExtraFilesystems();
// Now set up SELinux for second stage.
SelabelInitialize();
SelinuxRestoreContext();
// 创建 epoll 描述符结合注册 socket 监听,对挂掉的子进程重启处理
Epoll epoll;
if (auto result = epoll.Open(); !result.ok()) {
PLOG(FATAL) << result.error();
}
// We always reap children before responding to the other pending functions. This is to
// prevent a race where other daemons see that a service has exited and ask init to
// start it again via ctl.start before init has reaped it.
epoll.SetFirstCallback(ReapAnyOutstandingChildren);
// 子进程信号处理函数,如果子进程(Zygote进程)异常退出,init进程会调用该函数设定的信号函数来处理
// 主要用于防止 init 进程的子进程成为僵尸进程。
InstallSignalFdHandler(&epoll);
InstallInitNotifier(&epoll);
// 启动属性服务
StartPropertyService(&property_fd);
// Make the time that init stages started available for bootstat to log.
// 记录init阶段的启动时间
RecordStageBoottimes(start_time);
// Set libavb version for Framework-only OTA match in Treble build.
if (const char* avb_version = getenv("INIT_AVB_VERSION"); avb_version != nullptr) {
SetProperty("ro.boot.avb_version", avb_version);
}
unsetenv("INIT_AVB_VERSION");
fs_mgr_vendor_overlay_mount_all();
export_oem_lock_status();
MountHandler mount_handler(&epoll);
SetUsbController();
SetKernelVersion();
const BuiltinFunctionMap& function_map = GetBuiltinFunctionMap();
Action::set_function_map(&function_map);
if (!SetupMountNamespaces()) {
PLOG(FATAL) << "SetupMountNamespaces failed";
}
InitializeSubcontext();
// 开始加载initrc中的Action、service等
ActionManager& am = ActionManager::GetInstance();
ServiceList& sm = ServiceList::GetInstance();
// 解析 init.rc 文件
LoadBootScripts(am, sm);
// Turning this on and letting the INFO logging be discarded adds 0.2s to
// Nexus 9 boot time, so it's disabled by default.
if (false) DumpState();
// Make the GSI status available before scripts start running.
auto is_running = android::gsi::IsGsiRunning() ? "1" : "0";
SetProperty(gsi::kGsiBootedProp, is_running);
auto is_installed = android::gsi::IsGsiInstalled() ? "1" : "0";
SetProperty(gsi::kGsiInstalledProp, is_installed);
// 触发队列中的Action
am.QueueBuiltinAction(SetupCgroupsAction, "SetupCgroups");
am.QueueBuiltinAction(SetKptrRestrictAction, "SetKptrRestrict");
am.QueueBuiltinAction(TestPerfEventSelinuxAction, "TestPerfEventSelinux");
am.QueueBuiltinAction(ConnectEarlyStageSnapuserdAction, "ConnectEarlyStageSnapuserd");
am.QueueEventTrigger("early-init");
// Queue an action that waits for coldboot done so we know ueventd has set up all of /dev...
am.QueueBuiltinAction(wait_for_coldboot_done_action, "wait_for_coldboot_done");
// ... so that we can start queuing up actions that require stuff from /dev.
am.QueueBuiltinAction(SetMmapRndBitsAction, "SetMmapRndBits");
// 设备组合键的初始化
Keychords keychords;
am.QueueBuiltinAction(
[&epoll, &keychords](const BuiltinArguments& args) -> Result {
for (const auto& svc : ServiceList::GetInstance()) {
keychords.Register(svc->keycodes());
}
keychords.Start(&epoll, HandleKeychord);
return {};
},
"KeychordInit");
// Trigger all the boot actions to get us started.
am.QueueEventTrigger("init");
// Don't mount filesystems or start core system services in charger mode.
std::string bootmode = GetProperty("ro.bootmode", "");
if (bootmode == "charger") {
am.QueueEventTrigger("charger");
} else {
am.QueueEventTrigger("late-init");
}
// Run all property triggers based on current state of the properties.
am.QueueBuiltinAction(queue_property_triggers_action, "queue_property_triggers");
// Restore prio before main loop
setpriority(PRIO_PROCESS, 0, 0);
while (true) {
// By default, sleep until something happens. Do not convert far_future into
// std::chrono::milliseconds because that would trigger an overflow. The unit of boot_clock
// is 1ns.
const boot_clock::time_point far_future = boot_clock::time_point::max();
boot_clock::time_point next_action_time = far_future;
auto shutdown_command = shutdown_state.CheckShutdown();
if (shutdown_command) {
LOG(INFO) << "Got shutdown_command '" << *shutdown_command
<< "' Calling HandlePowerctlMessage()";
HandlePowerctlMessage(*shutdown_command);
}
if (!(prop_waiter_state.MightBeWaiting() || Service::is_exec_service_running())) {
// 依次执行每个 action 中携带的 command 对应的执行函数
am.ExecuteOneCommand();
// If there's more work to do, wake up again immediately.
if (am.HasMoreCommands()) {
next_action_time = boot_clock::now();
}
}
// Since the above code examined pending actions, no new actions must be
// queued by the code between this line and the Epoll::Wait() call below
// without calling WakeMainInitThread().
if (!IsShuttingDown()) {
auto next_process_action_time = HandleProcessActions();
// If there's a process that needs restarting, wake up in time for that.
if (next_process_action_time) {
next_action_time = std::min(next_action_time, *next_process_action_time);
}
}
std::optional epoll_timeout;
if (next_action_time != far_future) {
epoll_timeout = std::chrono::ceil(
std::max(next_action_time - boot_clock::now(), 0ns));
}
auto epoll_result = epoll.Wait(epoll_timeout);
if (!epoll_result.ok()) {
LOG(ERROR) << epoll_result.error();
}
if (!IsShuttingDown()) {
HandleControlMessages();
SetUsbController();
}
}
return 0;
}
在该阶段,有一些初始化动作,包括初始化并启动属性服务;还创建了epoll,监听子进程的状态对挂掉的子进程重启处理;然后解析init.rc和其他init.*.rc文件(如init.car.rc),执行其中的命令;