macOS启动过程源码分析

1. vstart(osfmk\i386\i386_init.c)

void
vstart(vm_offset_t boot_args_start)
{
    boolean_t   is_boot_cpu = !(boot_args_start == 0);
    int     cpu;
    uint32_t    lphysfree;

    postcode(VSTART_ENTRY);

    if (is_boot_cpu) {
        /*
         * Get startup parameters.
         */
        kernelBootArgs = (boot_args *)boot_args_start;
        lphysfree = kernelBootArgs->kaddr + kernelBootArgs->ksize;
        physfree = (void *)(uintptr_t)((lphysfree + PAGE_SIZE - 1) &~ (PAGE_SIZE - 1));

#if DEVELOPMENT || DEBUG
        pal_serial_init();
#endif
        DBG("revision      0x%x\n", kernelBootArgs->Revision);
        DBG("version       0x%x\n", kernelBootArgs->Version);
        DBG("command line  %s\n", kernelBootArgs->CommandLine);
        DBG("memory map    0x%x\n", kernelBootArgs->MemoryMap);
        DBG("memory map sz 0x%x\n", kernelBootArgs->MemoryMapSize);
        DBG("kaddr         0x%x\n", kernelBootArgs->kaddr);
        DBG("ksize         0x%x\n", kernelBootArgs->ksize);
        DBG("physfree      %p\n", physfree);
        DBG("bootargs: %p, &ksize: %p &kaddr: %p\n",
            kernelBootArgs, 
            &kernelBootArgs->ksize,
            &kernelBootArgs->kaddr);
        DBG("SMBIOS mem sz 0x%llx\n", kernelBootArgs->PhysicalMemorySize);

        /*
         * Setup boot args given the physical start address.
         * Note: PE_init_platform needs to be called before Idle_PTs_init
         * because access to the DeviceTree is required to read the
         * random seed before generating a random physical map slide.
         */
        kernelBootArgs = (boot_args *)
            ml_static_ptovirt(boot_args_start);
        DBG("i386_init(0x%lx) kernelBootArgs=%p\n",
            (unsigned long)boot_args_start, kernelBootArgs);
        PE_init_platform(FALSE, kernelBootArgs);
        postcode(PE_INIT_PLATFORM_D);

        Idle_PTs_init();
        postcode(VSTART_IDLE_PTS_INIT);

        first_avail = (vm_offset_t)ID_MAP_VTOP(physfree);

        cpu = 0;
        cpu_data_alloc(TRUE);
    } else {
        /* Switch to kernel's page tables (from the Boot PTs) */
        set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4));
        /* Find our logical cpu number */
        cpu = lapic_to_cpu[(LAPIC_READ(ID)>>LAPIC_ID_SHIFT) & LAPIC_ID_MASK];
        DBG("CPU: %d, GSBASE initial value: 0x%llx\n", cpu, rdmsr64(MSR_IA32_GS_BASE));
    }

    postcode(VSTART_CPU_DESC_INIT);
    if(is_boot_cpu)
        cpu_desc_init64(cpu_datap(cpu));
    cpu_desc_load64(cpu_datap(cpu));
    postcode(VSTART_CPU_MODE_INIT);
    if (is_boot_cpu)
        cpu_mode_init(current_cpu_datap()); /* cpu_mode_init() will be
                             * invoked on the APs
                             * via i386_init_slave()
                             */
    postcode(VSTART_EXIT);
    x86_init_wrapper(is_boot_cpu ? (uintptr_t) i386_init
                     : (uintptr_t) i386_init_slave,
             cpu_datap(cpu)->cpu_int_stack_top);
}

vstart 是 i386/x64 架构下的内核初始化函数,标志着从汇编代码到C语言代码的转换,也是一个特殊的函数,因为这个函数即在主CPU(引导CPU)上执行,又在机器上所有的从CPU(slave CPU)或核心上执行。is_boot_cpu 即是判断是主CPU还是从CPU。对于从CPU,传入的boot_args_start 指针参数是NULL。

对于主cpu:
PE_Init_Platfrom(Platform expert):初始化全局变量PE_state,其中包含引导参数的副本、视频参数和其他参数。另外,还有以下功能:

  • Building the device tree (as described above)
  • Parse certain boot arguments
  • Identify the machine (including processor and bus clock speeds)
  • Initialize a “user interface” to be used in case of kernel panics
    最后通过x86_init_wrapper(is_boot_cpu ? (uintptr_t) i386_init : (uintptr_t) i386_init_slave, cpu_datap(cpu)->cpu_int_stack_top);函数分别在主cpu和从cpu执行不同初始化代码。

2. 主cpu运行i386_init()(osfmk\i386\i386_init.c)

首先是一堆初始化

    pal_i386_init();
    tsc_init();
    rtclock_early_init();   /* mach_absolute_time() now functionsl */

    kernel_debug_string_simple("i386_init");
    pstate_trace();

#if CONFIG_MCA
    /* Initialize machine-check handling */
    mca_cpu_init();
#endif

    master_cpu = 0;
    cpu_init();

    postcode(CPU_INIT_D);

    printf_init();          /* Init this in case we need debugger */
    panic_init();           /* Init this in case we need debugger */

(1) pal_i386_init:调用Platfrom Abstraction Layer初始化,实际上是一个初始化EFI锁的简单调用
(2)PE_Init_Platfrom:初始化全局变量PE_state,其中包含引导参数的副本、视频参数和其他参数。这个函数调用pe_identify_platform( ) 设置gPEClockFrequency
(3)kernel_early_bootstrap:调用lck_mod_init( ) 和 timer_call_initialize( ),用于定时器调用
(4)cpu_init设置当前CPU时钟定时器的dealIne设置为“EndOfAllTime”(所有时间的终结),将时钟设置为永远运行之后,cpu_init( ) 调用i386_active_cpu( )
(5)printf_init:为调试器调用。如果连接了调试器。内核调用printf( ) 输入的消息会重定向到调试器
(6)panic_init:调用初始化内核崩溃的重定向,使得发送内核崩溃的时候可以被连接的调试器截获
(7)PE_init_kprintf:调用使得kprintf( ) 能够输出到控制台
(8)检查serial控制台:检查“serial”引导参数。如果设置了的话。就会调用switch_to_serial_console( )
(9)PE_Init_printf:调用使得kprintf( ) 能够输出到控制台
(10) 64位处理器检测:如果CPU支持的特性中包含CPUID_EXTFEATURE_EM64T标志,那么启用这项特性,除非内核命令行参数中传入“-legal”参数
(11)i386_vm_init:从EFI接管虚拟内存管理。调用pmap_bootstrap初始化内核物理内存映射
最后调用

    /*   
     * VM initialization, after this we're using page tables...
     * Thn maximum number of cpus must be set beforehand.
     */
    kernel_debug_string_simple("i386_vm_init");
    i386_vm_init(maxmemtouse, IA32e, kernelBootArgs);

    /* create the console for verbose or pretty mode */
    /* Note: doing this prior to tsc_init() allows for graceful panic! */
    PE_init_platform(TRUE, kernelBootArgs);
    PE_create_console();

    kernel_debug_string_simple("power_management_init");
    power_management_init();
    processor_bootstrap();
    thread_bootstrap();

    pstate_trace();
    kernel_debug_string_simple("machine_startup");
    machine_startup();
    pstate_trace();

其中power_management_init()的功能是初始化结构体中的一个函数cstateInit:
/*
* Dispatch table for functions that get installed when the power
* management KEXT loads.
*
* pmDispatch_t is the set of functions that the kernel can use to call
* into the power management KEXT.
*
* pmCallBacks_t is the set of functions that the power management kext
* can call to get at specific kernel functions.
* /

typedef struct
{
    kern_return_t   (*pmCPUStateInit)(void);
    void        (*cstateInit)(void);
    uint64_t        (*MachineIdle)(uint64_t maxIdleDuration);
    uint64_t        (*GetDeadline)(x86_lcpu_t *lcpu);
    uint64_t        (*SetDeadline)(x86_lcpu_t *lcpu, uint64_t);
    void        (*Deadline)(x86_lcpu_t *lcpu);
    boolean_t       (*exitIdle)(x86_lcpu_t *lcpu);
    void        (*markCPURunning)(x86_lcpu_t *lcpu);
    int         (*pmCPUControl)(uint32_t cmd, void *datap);
    void        (*pmCPUHalt)(void);
    uint64_t        (*getMaxSnoop)(void);
    void        (*setMaxBusDelay)(uint64_t time);
    uint64_t        (*getMaxBusDelay)(void);
    void        (*setMaxIntDelay)(uint64_t time);
    uint64_t        (*getMaxIntDelay)(void);
    void        (*pmCPUSafeMode)(x86_lcpu_t *lcpu, uint32_t flags);
    void        (*pmTimerStateSave)(void);
    void        (*pmTimerStateRestore)(void);
    kern_return_t   (*exitHalt)(x86_lcpu_t *lcpu);
    kern_return_t   (*exitHaltToOff)(x86_lcpu_t *lcpu);
    void        (*markAllCPUsOff)(void);
    void        (*pmSetRunCount)(uint32_t count);
    boolean_t       (*pmIsCPUUnAvailable)(x86_lcpu_t *lcpu);
    int         (*pmChooseCPU)(int startCPU, int endCPU, int preferredCPU);
    int         (*pmIPIHandler)(void *state);
    void        (*pmThreadTellUrgency)(int urgency, uint64_t rt_period, uint64_t rt_deadline);
    void        (*pmActiveRTThreads)(boolean_t active);
    boolean_t           (*pmInterruptPrewakeApplicable)(void);
} pmDispatch_t;

(15)processor_bootstrap:初始化Mach的处理器子系统。这个函数初始化3个队列:task、terminated tasks 和 threads,创建了master_processor 对象,调用 processor_init( ), processor_init( )设置处理器数据结构中的字段值,并且将自己加入到默认 处理器组pset0 中;
(16)thread_bootstrap:设置mach线程对象的模板。Mach线程的结构具有很多字段,这个函数填充这些字段的默认值,然后设置第一个系统线程init_thread,这个线程从模板中集成所有的值,然后调用machine_set_current_thread( )将这个线程标志位当前CPU 上的活动线程;
(17)machine_startup:初始化下一阶段,永远不返回。

3. machine_startup()(osfmk\i386\at386\Model_dep.c)

void
machine_startup(void)
{
    int boot_arg;

#if 0
    if( PE_get_hotkey( kPEControlKey ))
            halt_in_debugger = halt_in_debugger ? 0 : 1;
#endif

    if (PE_parse_boot_argn("debug", &debug_boot_arg, sizeof (debug_boot_arg))) {
        panicDebugging = TRUE;
#if DEVELOPMENT || DEBUG
        if (debug_boot_arg & DB_HALT) halt_in_debugger=1;
#endif
        if (debug_boot_arg & DB_PRT) disable_debug_output=FALSE; 
        if (debug_boot_arg & DB_SLOG) systemLogDiags=TRUE; 
        if (debug_boot_arg & DB_LOG_PI_SCRN) logPanicDataToScreen=TRUE;
#if KDEBUG_MOJO_TRACE
        if (debug_boot_arg & DB_PRT_KDEBUG) {
            kdebug_serial = TRUE;
            disable_debug_output = FALSE;
        }
#endif
    } else {
        debug_boot_arg = 0;
    }

    if (!PE_parse_boot_argn("nvram_paniclog", &commit_paniclog_to_nvram, sizeof (commit_paniclog_to_nvram)))
        commit_paniclog_to_nvram = 1;

    /*
     * Entering the debugger will put the CPUs into a "safe"
     * power mode.
     */
    if (PE_parse_boot_argn("pmsafe_debug", &boot_arg, sizeof (boot_arg)))
        pmsafe_debug = boot_arg;

#if NOTYET
    hw_lock_init(&debugger_lock);   /* initialize debugger lock */
#endif
    hw_lock_init(&pbtlock);     /* initialize print backtrace lock */

    if (PE_parse_boot_argn("preempt", &boot_arg, sizeof (boot_arg))) {
        default_preemption_rate = boot_arg;
    }
    if (PE_parse_boot_argn("unsafe", &boot_arg, sizeof (boot_arg))) {
        max_unsafe_quanta = boot_arg;
    }
    if (PE_parse_boot_argn("poll", &boot_arg, sizeof (boot_arg))) {
        max_poll_quanta = boot_arg;
    }
    if (PE_parse_boot_argn("yield", &boot_arg, sizeof (boot_arg))) {
        sched_poll_yield_shift = boot_arg;
    }
/* The I/O port to issue a read from, in the event of a panic. Useful for
 * triggering logic analyzers.
 */
    if (PE_parse_boot_argn("panic_io_port", &boot_arg, sizeof (boot_arg))) {
        /*I/O ports range from 0 through 0xFFFF */
        panic_io_port = boot_arg & 0xffff;
    }

    machine_conf();

    panic_hooks_init();

    /*
     * Start the system.
     */
    kernel_bootstrap();
    /*NOTREACHED*/
}

这个函数主要负责解析一些命令行参数(通过 Platform Expert 提供的PE_parse_boot_argn函数),这些命令行参数大部分都是调试用的boot-arg,用于控制引导时的调试。

4.kernel_bootstrap()(osfmk\kern\Startup.c)


void
kernel_bootstrap(void)
{
    kern_return_t   result;
    thread_t    thread;
    char        namep[16];

    printf("%s\n", version); /* log kernel version */

    if (PE_parse_boot_argn("-l", namep, sizeof (namep))) /* leaks logging */
        turn_on_log_leaks = 1;

    PE_parse_boot_argn("trace", &new_nkdbufs, sizeof (new_nkdbufs));
    PE_parse_boot_argn("trace_wake", &wake_nkdbufs, sizeof (wake_nkdbufs));
    PE_parse_boot_argn("trace_panic", &write_trace_on_panic, sizeof(write_trace_on_panic));
    PE_parse_boot_argn("trace_typefilter", &trace_typefilter, sizeof(trace_typefilter));

    scale_setup();

    kernel_bootstrap_log("vm_mem_bootstrap");
    vm_mem_bootstrap();

    kernel_bootstrap_log("cs_init");
    cs_init();

    kernel_bootstrap_log("vm_mem_init");
    vm_mem_init();

    machine_info.memory_size = (uint32_t)mem_size;
    machine_info.max_mem = max_mem;
    machine_info.major_version = version_major;
    machine_info.minor_version = version_minor;


#if CONFIG_TELEMETRY
    kernel_bootstrap_log("telemetry_init");
    telemetry_init();
#endif

#if CONFIG_CSR
    kernel_bootstrap_log("csr_init");
    csr_init();
#endif

    if (PE_i_can_has_debugger(NULL) &&
        PE_parse_boot_argn("-show_pointers", &namep, sizeof (namep))) {
        doprnt_hide_pointers = FALSE;
    }

    kernel_bootstrap_log("console_init");
    console_init();

    kernel_bootstrap_log("stackshot_lock_init");    
    stackshot_lock_init();

    kernel_bootstrap_log("sched_init");
    sched_init();

    kernel_bootstrap_log("waitq_bootstrap");
    waitq_bootstrap();

    kernel_bootstrap_log("ipc_bootstrap");
    ipc_bootstrap();

#if CONFIG_MACF
    kernel_bootstrap_log("mac_policy_init");
    mac_policy_init();
#endif

    kernel_bootstrap_log("ipc_init");
    ipc_init();

    /*
     * As soon as the virtual memory system is up, we record
     * that this CPU is using the kernel pmap.
     */
    kernel_bootstrap_log("PMAP_ACTIVATE_KERNEL");
    PMAP_ACTIVATE_KERNEL(master_cpu);

    kernel_bootstrap_log("mapping_free_prime");
    mapping_free_prime();                       /* Load up with temporary mapping blocks */

    kernel_bootstrap_log("machine_init");
    machine_init();

    kernel_bootstrap_log("clock_init");
    clock_init();

    ledger_init();

    /*
     *  Initialize the IPC, task, and thread subsystems.
     */
#if CONFIG_COALITIONS
    kernel_bootstrap_log("coalitions_init");
    coalitions_init();
#endif

    kernel_bootstrap_log("task_init");
    task_init();

    kernel_bootstrap_log("thread_init");
    thread_init();

#if CONFIG_ATM
    /* Initialize the Activity Trace Resource Manager. */
    kernel_bootstrap_log("atm_init");
    atm_init();
#endif

#if CONFIG_BANK
    /* Initialize the BANK Manager. */
    kernel_bootstrap_log("bank_init");
    bank_init();
#endif

    /* initialize the corpse config based on boot-args */
    corpses_init();

    /*
     *  Create a kernel thread to execute the kernel bootstrap.
     */
    kernel_bootstrap_log("kernel_thread_create");
    result = kernel_thread_create((thread_continue_t)kernel_bootstrap_thread, NULL, MAXPRI_KERNEL, &thread);

    if (result != KERN_SUCCESS) panic("kernel_bootstrap: result = %08X\n", result);

    thread->state = TH_RUN;
    thread->last_made_runnable_time = mach_absolute_time();
    thread_deallocate(thread);

    kernel_bootstrap_log("load_context - done");
    load_context(thread);
    /*NOTREACHED*/
}

kernel_bootstrap 函数继续设置和初始化Mach内核的各个核心子系统,建立起BSD 所依赖的必要基础设置,除了虚拟内存之外,kernel_bootstrap 还初始化Mach的一些关键抽象:

IPC:IPC(进程间通信)是Mach构建的根基,IPC 要求一些重要的资源,例如内存、同步对象和Mach 接口生成器(Mach Interface Generator,MIG)
时钟(clock):通过时钟抽象实现闹铃(系统闹钟)和报时功能(“日历”)
线程(thread):线程是实际的执行单元。任务只不过是一个资源容器,真正被调度和执行的线程。
kernel_bootstrap 函数不会返回,kernel_bootstrap 最后加载kernel_bootstrap_thread 线程的上下文,这是系统的第一个活动线程。这个线程会接管初始化的工作,处理更复杂的子系统。

5.kernel_bootstrap_thread(void)(osfmk\kern\Startup.c)

/*
 * Now running in a thread.  Kick off other services,
 * invoke user bootstrap, enter pageout loop.
 */
static void
kernel_bootstrap_thread(void)
{
    processor_t     processor = current_processor();

#define kernel_bootstrap_thread_kprintf(x...) /* kprintf("kernel_bootstrap_thread: " x) */
    kernel_bootstrap_thread_log("idle_thread_create");
    /*
     * Create the idle processor thread.
     */
    idle_thread_create(processor);

    /*
     * N.B. Do not stick anything else
     * before this point.
     *
     * Start up the scheduler services.
     */
    kernel_bootstrap_thread_log("sched_startup");
    sched_startup();

    /*
     * Thread lifecycle maintenance (teardown, stack allocation)
     */
    kernel_bootstrap_thread_log("thread_daemon_init");
    thread_daemon_init();

    /* Create kernel map entry reserve */
    vm_kernel_reserved_entry_init();

    /*
     * Thread callout service.
     */
    kernel_bootstrap_thread_log("thread_call_initialize");
    thread_call_initialize();

    /*
     * Remain on current processor as
     * additional processors come online.
     */
    kernel_bootstrap_thread_log("thread_bind");
    thread_bind(processor);

    /*
     * Initialize ipc thread call support.
     */
    kernel_bootstrap_thread_log("ipc_thread_call_init");
    ipc_thread_call_init();

    /*
     * Kick off memory mapping adjustments.
     */
    kernel_bootstrap_thread_log("mapping_adjust");
    mapping_adjust();

    /*
     *  Create the clock service.
     */
    kernel_bootstrap_thread_log("clock_service_create");
    clock_service_create();

    /*
     *  Create the device service.
     */
    device_service_create();

    kth_started = 1;

#if (defined(__i386__) || defined(__x86_64__)) && NCOPY_WINDOWS > 0
    /*
     * Create and initialize the physical copy window for processor 0
     * This is required before starting kicking off  IOKit.
     */
    cpu_physwindow_init(0);
#endif



#if MACH_KDP 
    kernel_bootstrap_log("kdp_init");
    kdp_init();
#endif

#if ALTERNATE_DEBUGGER
    alternate_debugger_init();
#endif

#if KPC
    kpc_init();
#endif

#if CONFIG_ECC_LOGGING
    ecc_log_init();
#endif 

#if KPERF
    kperf_bootstrap();
#endif

#if HYPERVISOR
    hv_support_init();
#endif

#if CONFIG_TELEMETRY
    kernel_bootstrap_log("bootprofile_init");
    bootprofile_init();
#endif

#if (defined(__i386__) || defined(__x86_64__)) && CONFIG_VMX
    vmx_init();
#endif

#if (defined(__i386__) || defined(__x86_64__))
    if (kdebug_serial) {
        new_nkdbufs = 1;
        if (trace_typefilter == 0)
            trace_typefilter = 1;
    }
    if (turn_on_log_leaks && !new_nkdbufs)
        new_nkdbufs = 200000;
    if (trace_typefilter)
        start_kern_tracing_with_typefilter(new_nkdbufs,
                           FALSE,
                           trace_typefilter);
    else
        start_kern_tracing(new_nkdbufs, FALSE);
    if (turn_on_log_leaks)
        log_leaks = 1;

#endif

    kernel_bootstrap_log("prng_init");
    prng_cpu_init(master_cpu);

#ifdef  IOKIT
    PE_init_iokit();
#endif

    assert(ml_get_interrupts_enabled() == FALSE);
    (void) spllo();     /* Allow interruptions */

#if (defined(__i386__) || defined(__x86_64__)) && NCOPY_WINDOWS > 0
    /*
     * Create and initialize the copy window for processor 0
     * This also allocates window space for all other processors.
     * However, this is dependent on the number of processors - so this call
     * must be after IOKit has been started because IOKit performs processor
     * discovery.
     */
    cpu_userwindow_init(0);
#endif

#if (!defined(__i386__) && !defined(__x86_64__))
    if (turn_on_log_leaks && !new_nkdbufs)
        new_nkdbufs = 200000;
    if (trace_typefilter)
        start_kern_tracing_with_typefilter(new_nkdbufs, FALSE, trace_typefilter);
    else
        start_kern_tracing(new_nkdbufs, FALSE);
    if (turn_on_log_leaks)
        log_leaks = 1;
#endif

    /*
     *  Initialize the shared region module.
     */
    vm_shared_region_init();
    vm_commpage_init();
    vm_commpage_text_init();

#if CONFIG_MACF
    kernel_bootstrap_log("mac_policy_initmach");
    mac_policy_initmach();
#endif


#if CONFIG_SCHED_SFI
    kernel_bootstrap_log("sfi_init");
    sfi_init();
#endif

    /*
     * Initialize the globals used for permuting kernel
     * addresses that may be exported to userland as tokens
     * using VM_KERNEL_ADDRPERM()/VM_KERNEL_ADDRPERM_EXTERNAL().
     * Force the random number to be odd to avoid mapping a non-zero
     * word-aligned address to zero via addition.
     * Note: at this stage we can use the cryptographically secure PRNG
     * rather than early_random().
     */
    read_random(&vm_kernel_addrperm, sizeof(vm_kernel_addrperm));
    vm_kernel_addrperm |= 1;
    read_random(&buf_kernel_addrperm, sizeof(buf_kernel_addrperm));
    buf_kernel_addrperm |= 1;
    read_random(&vm_kernel_addrperm_ext, sizeof(vm_kernel_addrperm_ext));
    vm_kernel_addrperm_ext |= 1;

    vm_set_restrictions();



    /*
     *  Start the user bootstrap.
     */
#ifdef  MACH_BSD
    bsd_init();
#endif

    /*
     * Get rid of segments used to bootstrap kext loading. This removes
     * the KLD, PRELINK symtab, LINKEDIT, and symtab segments/load commands.
     */
    OSKextRemoveKextBootstrap();

    serial_keyboard_init();     /* Start serial keyboard if wanted */

    vm_page_init_local_q();

    thread_bind(PROCESSOR_NULL);

    /*
     *  Become the pageout daemon.
     */
    vm_pageout();
    /*NOTREACHED*/
}

主线程开始以kernel_bootstrap_thread 线程的身份运行,这个线程的任务仍然是初始化各种子系统。

6.bsd_init()(bsd\kern\Bsd_init.c)


    /* Initialize signal state for process 0. */
    bsd_init_kprintf("calling siginit\n");
    siginit(kernproc);

    bsd_init_kprintf("calling bsd_utaskbootstrap\n");
    bsd_utaskbootstrap();

#if defined(__LP64__)
    kernproc->p_flag |= P_LP64;
#endif

XNU 的整个BSD 层的初始化都是由一个名为bsd_init( ) 的函数进行的。这个函数中执行了大量的工作。函数的职责是一次初始化各个子系统。

7.bsd_utaskbootstrap()(bsd\kern\Bsd_init.c)

void
bsd_utaskbootstrap(void)
{
    thread_t thread;
    struct uthread *ut;

    /*
     * Clone the bootstrap process from the kernel process, without
     * inheriting either task characteristics or memory from the kernel;
     */
    thread = cloneproc(TASK_NULL, COALITION_NULL, kernproc, FALSE, TRUE);

    /* Hold the reference as it will be dropped during shutdown */
    initproc = proc_find(1);                
#if __PROC_INTERNAL_DEBUG
    if (initproc == PROC_NULL)
        panic("bsd_utaskbootstrap: initproc not set\n");
#endif
    /*
     * Since we aren't going back out the normal way to our parent,
     * we have to drop the transition locks explicitly.
     */
    proc_signalend(initproc, 0);
    proc_transend(initproc, 0);

    ut = (struct uthread *)get_bsdthread_info(thread);
    ut->uu_sigmask = 0;
    act_set_astbsd(thread);
    proc_clear_return_wait(initproc, thread);
}

这个函数负责间接启动 PID 1,这是第一个要进入用户态的进程。为此,bsd_utaskbootstrap( ) 首先调用 cloneproc( ), 创建一个新的Mach 任务。
为了真正地创建出新的任务,utaskbootstrap( ) 对创建的线程调用act_set_astbsd( ),生成一个异步系统陷阱(asynchronous system trap,AST),接着调用thread_resume( ),然后utaskbootstrap( ) 返回至bsd_init( )

8. i386_astintr()(osfmk\i386\Trap.c)

/*
 * Handle AST traps for i386.
 */

extern void     log_thread_action (thread_t, char *);

void
i386_astintr(int preemption)
{
    ast_t       mask = AST_ALL;
    spl_t       s;

    if (preemption)
            mask = AST_PREEMPTION;

    s = splsched();

    ast_taken(mask, s);

    splx(s);
}

ast_taken()函数:(osfmk\kern\Ast.c)

        /*
         * The kernel preempt traps
         * skip all other ASTs.
         */
        if (!preempt_trap) {
            ml_set_interrupts_enabled(enable);

#ifdef  MACH_BSD
            /*
             * Handle BSD hook.
             */
            if (reasons & AST_BSD) {
                thread_ast_clear(thread, AST_BSD);
                bsd_ast(thread);
            }
#endif
#if CONFIG_MACF
            /*
             * Handle MACF hook.
             */
            if (reasons & AST_MACF) {
                thread_ast_clear(thread, AST_MACF);
                mac_thread_userret(thread);
            }
#endif

bsd_ast()函数(osfmk\kern\Kern_sig.c)

    if (!bsd_init_done) {
        bsd_init_done = 1;
        bsdinit_task();
    }

9. bsdinit_task()(bsd\kern\Bsd_init.c)

void
bsdinit_task(void)
{
    proc_t p = current_proc();
    struct uthread *ut;
    thread_t thread;

    process_name("init", p);

    ux_handler_init();

    thread = current_thread();
    (void) host_set_exception_ports(host_priv_self(),
                    EXC_MASK_ALL & ~(EXC_MASK_RPC_ALERT),//pilotfish (shark) needs this port
                    (mach_port_t) ux_exception_port,
                    EXCEPTION_DEFAULT| MACH_EXCEPTION_CODES,
                    0);

    ut = (uthread_t)get_bsdthread_info(thread);

    bsd_init_task = get_threadtask(thread);
    init_task_died = FALSE;

#if CONFIG_MACF
    mac_cred_label_associate_user(p->p_ucred);
#endif
    load_init_program(p);
    lock_trace = 1;
}

初始进程的名字设置为init,接下来调用un_handler_init( ),这个调用创建一个独立的内核线程 un_handler,这个线程负责处理UNIX 异常。最后,调用load_init_program( )。load_init_program( ) 负责将PID 为1 的进程转变为众所周知的launchd。这个线程的流程从此完全进入用户态。

10. load_init_program()(bsd\kern\Kern_exec.c)

/*
 * load_init_program
 *
 * Description: Load the "init" program; in most cases, this will be "launchd"
 *
 * Parameters:  p           Process to call execve() to create
 *                  the "init" program
 *
 * Returns: (void)
 *
 * Notes:   The process that is passed in is the first manufactured
 *      process on the system, and gets here via bsd_ast() firing
 *      for the first time.  This is done to ensure that bsd_init()
 *      has run to completion.
 *
 *      In DEBUG & DEVELOPMENT builds, the launchdsuffix boot-arg
 *      may be used to select a specific launchd executable. As with
 *      the kcsuffix boot-arg, setting launchdsuffix to "" or "release"
 *      will force /sbin/launchd to be selected.
 *
 *      The DEBUG kernel will continue to check for a .development
 *      version until //problem/17931977> is fixed.
 *
 *              Search order by build:
 *
 * DEBUG    DEVELOPMENT RELEASE     PATH
 * ----------------------------------------------------------------------------------
 * 1        1       NA      /usr/local/sbin/launchd.$LAUNCHDSUFFIX
 * 2        NA      NA      /usr/local/sbin/launchd.debug
 * 3        2       NA      /usr/local/sbin/launchd.development
 * 4        3       1       /sbin/launchd
 */
void
load_init_program(proc_t p)
{
    uint32_t i;
    int error;
    vm_offset_t scratch_addr = VM_MIN_ADDRESS;

    (void) vm_allocate(current_map(), &scratch_addr, PAGE_SIZE, VM_FLAGS_ANYWHERE);
#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
    (void) memorystatus_init_at_boot_snapshot();
#endif /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */

#if DEBUG || DEVELOPMENT
    /* Check for boot-arg suffix first */
    char launchd_suffix[64];
    if (PE_parse_boot_argn("launchdsuffix", launchd_suffix, sizeof(launchd_suffix))) {
        char launchd_path[128];
        boolean_t is_release_suffix = ((launchd_suffix[0] == 0) ||
                           (strcmp(launchd_suffix, "release") == 0));

        if (is_release_suffix) {
            error = load_init_program_at_path(p, CAST_USER_ADDR_T(scratch_addr), "/sbin/launchd");
            if (!error)
                return;

            panic("Process 1 exec of launchd.release failed, errno %d", error);
        } else {
            strlcpy(launchd_path, "/usr/local/sbin/launchd.", sizeof(launchd_path));
            strlcat(launchd_path, launchd_suffix, sizeof(launchd_path));

            /* All the error data is lost in the loop below, don't
             * attempt to save it. */
            if (!load_init_program_at_path(p, CAST_USER_ADDR_T(scratch_addr), launchd_path)) {
                return;
            }
        }
    }
#endif

    error = ENOENT;
    for (i = 0; i < sizeof(init_programs)/sizeof(init_programs[0]); i++) {
        error = load_init_program_at_path(p, CAST_USER_ADDR_T(scratch_addr), init_programs[i]);
        if (!error)
            return;
    }

    panic("Process 1 exec of %s failed, errno %d", ((i == 0) ? "" : init_programs[i-1]), error);
}

启动下面这些程序:

static const char * init_programs[] = {
#if DEBUG
    "/usr/local/sbin/launchd.debug",
#endif
#if DEVELOPMENT || DEBUG
    /* Remove DEBUG conditional when  is fixed */
    "/usr/local/sbin/launchd.development",
#endif
    "/sbin/launchd",
};

另外:
XNU 有很多引导参数,通常有两种方法向内核传递参数

通过NVRAM,利用boot-args 参数传递(可以通过nvram命令设置参数)
通过/Library/Prefrences/SystemConfihguration/com.apple.Boot.plist 文件。这是一个标准的属性列表文件,在这个文件中华,可以在kernel_flags 元素中指定参数

你可能感兴趣的:(macOS源码分析)