Kernel启动流程源码解析 5 start_kernel 下


一 start_kernel

start_kernel函数是kernel启动过程执行的第一个c语言函数,其通过调用一系列初始化函数完成的内核的初始化工作,下篇分析 local_irq_enable之后的代码。

1.0 start_kernel

定义在init/main.c中

asmlinkage void __init start_kernel(void)
{
    char * command_line;
    extern const struct kernel_param __start___param[], __stop___param[];

    /*
     * Need to run as early as possible, to initialize the
     * lockdep hash:
     */
    lockdep_init(); // 初始化内核死锁检测机制的哈希表
    smp_setup_processor_id(); // 返回cpu号,单核cpu返回0
    debug_objects_early_init(); // 对调试对象进行早期的初始化

    cgroup_init_early(); // 对Control Groups进行早期的初始化

    local_irq_disable(); // 关闭当前cpu的中断
    early_boot_irqs_disabled = true;

/*
 * Interrupts are still disabled. Do necessary setups, then
 * enable them
 */
    boot_cpu_init(); // 设置当前cpu位激活状态
    page_address_init();  // 初始化高端内存的,arm没有用到
    pr_notice("%s", linux_banner);
    setup_arch(&command_line); // 内核架构相关初始化函数
    /*
     * Set up the the initial canary ASAP:
     */
    boot_init_stack_canary(); // 初始化栈canary值,canary值用于防止栈溢出攻击的堆栈的保护字
    mm_init_owner(&init_mm, &init_task); // mm.owner = &init_task
    mm_init_cpumask(&init_mm);
    setup_command_line(command_line); // 对cmdline进行备份
    setup_nr_cpu_ids(); // nr_cpu_ids
    setup_per_cpu_areas(); // 每个cpu的per-cpu变量副本分配空间
    smp_prepare_boot_cpu();    /* arch-specific boot-cpu hooks */

    build_all_zonelists(NULL, NULL); // 建立系统内存页区(zone)链表
    page_alloc_init(); // 内存页初始化

    pr_notice("Kernel command line: %s\n", boot_command_line);
    parse_early_param(); // 解析需要'早期'处理的启动参数用?setup_arch已经调用了一次
    parse_args("Booting kernel", static_command_line, __start___param,
           __stop___param - __start___param,
           -1, -1, &unknown_bootoption); // 解析cmdline中的启动参数

    jump_label_init(); // 处理静态定义在跳转标号

    /*
     * These use large bootmem allocations and must precede
     * kmem_cache_init()
     */
    setup_log_buf(0); // 使用memblock_alloc分配一个启动时log缓冲区
    pidhash_init(); // 初始化pid散列表
    vfs_caches_init_early(); // 初始化dentry和inode的hashtable
    sort_main_extable(); // 对内核异常向量表进行排序
    trap_init(); // 对内核陷阱异常进行初始化,arm没有用到
    mm_init(); // 初始化内核内存分配器,过度到伙伴系统,启动slab机制,初始化非连续内存区

    /*
     * Set up the scheduler prior starting any interrupts (such as the
     * timer interrupt). Full topology setup happens at smp_init()
     * time - but meanwhile we still have a functioning scheduler.
     */
    sched_init(); // 初始化进程调度器
    /*
     * Disable preemption - early bootup scheduling is extremely
     * fragile until we cpu_idle() for the first time.
     */
    preempt_disable(); // 进制内核抢占
    if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n"))
        local_irq_disable(); // 关闭本地中断
    idr_init_cache(); // 创建idr(整数id管理机制)高速缓存
    perf_event_init(); // 初始化性能诊断工具
    rcu_init(); // 初始化rcu机制(读-写-拷贝)
    tick_nohz_init(); // 初始化动态时钟框架
    radix_tree_init(); // 初始化内核基数树
    /* init some links before init_ISA_irqs() */
    early_irq_init(); // arm64没有用到
    init_IRQ(); // 初始化中断
    tick_init(); // 初始化时钟滴答控制器
    init_timers(); // 初始化内核定时器
    hrtimers_init(); // 初始化高精度时钟
    softirq_init(); // 初始化软中断
    timekeeping_init();  // 初始化了大量的时钟相关全局变量
    time_init(); // 时钟初始化
    profile_init(); //  对内核的一个性能测试工具profile进行初始化
    call_function_init(); // smp下跨cpu的函数传递初始化
    WARN(!irqs_disabled(), "Interrupts were enabled early\n");
    early_boot_irqs_disabled = false;
    local_irq_enable(); // 使能当前cpu中断
// -----------------------------------------------------------
    kmem_cache_init_late(); // 初始化slab分配器的缓存机制

    /*
     * HACK ALERT! This is early. We're enabling the console before
     * we've done PCI setups etc, and console_init() must be aware of
     * this. But we do want output early, in case something goes wrong.
     */
    console_init(); // 初始化控制台
    if (panic_later)
        panic(panic_later, panic_param);

    lockdep_info(); // 打印锁的依赖信息

    /*
     * Need to run this when irqs are enabled, because it wants
     * to self-test [hard/soft]-irqs on/off lock inversion bugs
     * too:
     */
    locking_selftest(); // 死锁检测

#ifdef CONFIG_BLK_DEV_INITRD // 检查initrd的位置是否符合要求
    if (initrd_start && !initrd_below_start_ok &&
        page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
        pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
            page_to_pfn(virt_to_page((void *)initrd_start)),
            min_low_pfn);
        initrd_start = 0;
    }
#endif
    page_cgroup_init(); // 为page_cgroup相关结构分配存储空间
    debug_objects_mem_init(); // 创建debug_obj的slab高速缓存
    kmemleak_init(); // 初始化内存泄漏检测机制
    setup_per_cpu_pageset(); // 设置并初始化每个cpu的页组
    numa_policy_init(); // 初始化NUMA的内存访问策略
    if (late_time_init) // arm64为空
        late_time_init();
    sched_clock_init(); // 初始化调度器时钟
    calibrate_delay(); // 延时校准
    pidmap_init(); // 初始化进程pid位图
    anon_vma_init(); // 创建anon_vma的slab缓存
#ifdef CONFIG_X86
    if (efi_enabled(EFI_RUNTIME_SERVICES))
        efi_enter_virtual_mode();
#endif
    thread_info_cache_init(); // 创建进程thread_info的slab高速缓存
    cred_init(); // 创建任务信用系统的slab高速缓存
    fork_init(totalram_pages); // 初始化进程创建机制
    proc_caches_init(); // 创建进程所需的各结构体slab高速缓存
    buffer_init(); // 为buffer_head结构体创建slab高速缓存
    key_init(); // 初始化内核密钥管理系统
    security_init(); // 初始化内核安全框架
    dbg_late_init(); // 初始化内核调试模块kdb
    vfs_caches_init(totalram_pages); // 初始化虚拟文件系统
    signals_init(); // 创建信号队列slab高速缓存
    /* rootfs populating might need page-writeback */
    page_writeback_init(); // 初始化页回写机制
#ifdef CONFIG_PROC_FS
    proc_root_init(); // 初始化proc文件系统
#endif
    cgroup_init(); // control group正式初始化
    cpuset_init(); // 初始化cpuset
    taskstats_init_early(); // 任务状态早期初始化函数,创建高速缓存并初始化互斥机制
    delayacct_init(); // 初始化任务延时机制

    check_bugs(); // arm64为空

    acpi_early_init(); /* before LAPIC and SMP init */ // 初始化acpi电源管理
    sfi_init_late(); // simple fireware interface
    if (efi_enabled(EFI_RUNTIME_SERVICES)) { // arm暂时没有用到
        efi_late_init();
        efi_free_boot_services();
    }

    ftrace_init(); // 初始化ftrace

    /* Do the rest non-__init'ed, we're now alive */
    rest_init(); // 后续初始化,单独分析
}

1.1 kmem_cache_init_late

定义在mm/slab.c中
void __init kmem_cache_init_late(void)
{
    struct kmem_cache *cachep;

    slab_state = UP;

    /* 6) resize the head arrays to their final sizes */
    mutex_lock(&slab_mutex);
    list_for_each_entry(cachep, &slab_caches, list)
        if (enable_cpucache(cachep, GFP_NOWAIT))
            BUG();
    mutex_unlock(&slab_mutex);

    /* Annotate slab for lockdep -- annotate the malloc caches */
    init_lock_keys();

    /* Done! */
    slab_state = FULL;

    /*
     * Register a cpu startup notifier callback that initializes
     * cpu_cache_get for all new cpus
     */
    register_cpu_notifier(&cpucache_notifier);

#ifdef CONFIG_NUMA
    /*
     * Register a memory hotplug callback that initializes and frees
     * node.
     */
    hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
#endif

    /*
     * The reap timers are started later, with a module init call: That part
     * of the kernel is not yet operational.
     */
}


1.2 console_init

定义在drivers/tty/tty_io.c中
void __init console_init(void)
{
    initcall_t *call;

    /* Setup the default TTY line discipline. */
    tty_ldisc_begin();

    /*
     * set up the console device so that later boot sequences can
     * inform about problems etc..
     */
    call = __con_initcall_start;
    while (call < __con_initcall_end) {
        (*call)();
        call++;
    }
}

1.3 lockdep_info

定义在kernel/lockdep.c中
void __init lockdep_info(void)
{
    printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");

    printk("... MAX_LOCKDEP_SUBCLASSES:  %lu\n", MAX_LOCKDEP_SUBCLASSES);
    printk("... MAX_LOCK_DEPTH:          %lu\n", MAX_LOCK_DEPTH);
    printk("... MAX_LOCKDEP_KEYS:        %lu\n", MAX_LOCKDEP_KEYS);
    printk("... CLASSHASH_SIZE:          %lu\n", CLASSHASH_SIZE);
    printk("... MAX_LOCKDEP_ENTRIES:     %lu\n", MAX_LOCKDEP_ENTRIES);
    printk("... MAX_LOCKDEP_CHAINS:      %lu\n", MAX_LOCKDEP_CHAINS);
    printk("... CHAINHASH_SIZE:          %lu\n", CHAINHASH_SIZE);

    printk(" memory used by lock dependency info: %lu kB\n",
        (sizeof(struct lock_class) * MAX_LOCKDEP_KEYS +
        sizeof(struct list_head) * CLASSHASH_SIZE +
        sizeof(struct lock_list) * MAX_LOCKDEP_ENTRIES +
        sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS +
        sizeof(struct list_head) * CHAINHASH_SIZE
#ifdef CONFIG_PROVE_LOCKING
        + sizeof(struct circular_queue)
#endif
        ) / 1024
        );

    printk(" per task-struct memory footprint: %lu bytes\n",
        sizeof(struct held_lock) * MAX_LOCK_DEPTH);

#ifdef CONFIG_DEBUG_LOCKDEP
    if (lockdep_init_error) {
        printk("WARNING: lockdep init error! lock-%s was acquired"
            "before lockdep_init\n", lock_init_error);
        printk("Call stack leading to lockdep invocation was:\n");
        print_stack_trace(&lockdep_init_trace, 0);
    }
#endif
}

1.4 locking_selftest

定义在lib/locking-selftest.c中
void locking_selftest(void)
{
    /*
     * Got a locking failure before the selftest ran?
     */
    if (!debug_locks) {
        printk("----------------------------------\n");
        printk("| Locking API testsuite disabled |\n");
        printk("----------------------------------\n");
        return;
    }

    /*
     * Run the testsuite:
     */
    printk("------------------------\n");
    printk("| Locking API testsuite:\n");
    printk("----------------------------------------------------------------------------\n");
    printk("                                 | spin |wlock |rlock |mutex | wsem | rsem |\n");
    printk("  --------------------------------------------------------------------------\n");

    init_shared_classes();
    debug_locks_silent = !debug_locks_verbose;

    DO_TESTCASE_6R("A-A deadlock", AA);
    DO_TESTCASE_6R("A-B-B-A deadlock", ABBA);
    DO_TESTCASE_6R("A-B-B-C-C-A deadlock", ABBCCA);
    DO_TESTCASE_6R("A-B-C-A-B-C deadlock", ABCABC);
    DO_TESTCASE_6R("A-B-B-C-C-D-D-A deadlock", ABBCCDDA);
    DO_TESTCASE_6R("A-B-C-D-B-D-D-A deadlock", ABCDBDDA);
    DO_TESTCASE_6R("A-B-C-D-B-C-D-A deadlock", ABCDBCDA);
    DO_TESTCASE_6("double unlock", double_unlock);
    DO_TESTCASE_6("initialize held", init_held);
    DO_TESTCASE_6_SUCCESS("bad unlock order", bad_unlock_order);

    printk("  --------------------------------------------------------------------------\n");
    print_testname("recursive read-lock");
    printk("             |");
    dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK);
    printk("             |");
    dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM);
    printk("\n");

    print_testname("recursive read-lock #2");
    printk("             |");
    dotest(rlock_AA1B, SUCCESS, LOCKTYPE_RWLOCK);
    printk("             |");
    dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM);
    printk("\n");

    print_testname("mixed read-write-lock");
    printk("             |");
    dotest(rlock_AA2, FAILURE, LOCKTYPE_RWLOCK);
    printk("             |");
    dotest(rsem_AA2, FAILURE, LOCKTYPE_RWSEM);
    printk("\n");

    print_testname("mixed write-read-lock");
    printk("             |");
    dotest(rlock_AA3, FAILURE, LOCKTYPE_RWLOCK);
    printk("             |");
    dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM);
    printk("\n");

    printk("  --------------------------------------------------------------------------\n");

    /*
     * irq-context testcases:
     */
    DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1);
    DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A);
    DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B);
    DO_TESTCASE_6x6("safe-A + unsafe-B #1", irqsafe3);
    DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4);
    DO_TESTCASE_6x6RW("irq lock-inversion", irq_inversion);

    DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
//    DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);

    if (unexpected_testcase_failures) {
        printk("-----------------------------------------------------------------\n");
        debug_locks = 0;
        printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n",
            unexpected_testcase_failures, testcase_total);
        printk("-----------------------------------------------------------------\n");
    } else if (expected_testcase_failures && testcase_successes) {
        printk("--------------------------------------------------------\n");
        printk("%3d out of %3d testcases failed, as expected. |\n",
            expected_testcase_failures, testcase_total);
        printk("----------------------------------------------------\n");
        debug_locks = 1;
    } else if (expected_testcase_failures && !testcase_successes) {
        printk("--------------------------------------------------------\n");
        printk("All %3d testcases failed, as expected. |\n",
            expected_testcase_failures);
        printk("----------------------------------------\n");
        debug_locks = 1;
    } else {
        printk("-------------------------------------------------------\n");
        printk("Good, all %3d testcases passed! |\n",
            testcase_successes);
        printk("---------------------------------\n");
        debug_locks = 1;
    }
    debug_locks_silent = 0;
}


1.5 page_cgroup_init

定义在mm/page_cgroup.c中
void __init page_cgroup_init(void)
{
    unsigned long pfn;
    int nid;

    if (mem_cgroup_disabled())
        return;

    for_each_node_state(nid, N_MEMORY) {
        unsigned long start_pfn, end_pfn;

        start_pfn = node_start_pfn(nid);
        end_pfn = node_end_pfn(nid);
        /*
         * start_pfn and end_pfn may not be aligned to SECTION and the
         * page->flags of out of node pages are not initialized.  So we
         * scan [start_pfn, the biggest section's pfn < end_pfn) here.
         */
        for (pfn = start_pfn;
             pfn < end_pfn;
                     pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {

            if (!pfn_valid(pfn))
                continue;
            /*
             * Nodes's pfns can be overlapping.
             * We know some arch can have a nodes layout such as
             * -------------pfn-------------->
             * N0 | N1 | N2 | N0 | N1 | N2|....
             */
            if (pfn_to_nid(pfn) != nid)
                continue;
            if (init_section_page_cgroup(pfn, nid))
                goto oom;
        }
    }
    hotplug_memory_notifier(page_cgroup_callback, 0);
    printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
    printk(KERN_INFO "please try 'cgroup_disable=memory' option if you "
             "don't want memory cgroups\n");
    return;
oom:
    printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n");
    panic("Out of memory");
}

1.6 debug_objects_mem_init

定义在lib/debugobjects.c中
void __init debug_objects_mem_init(void)
{
    if (!debug_objects_enabled)
        return;

    obj_cache = kmem_cache_create("debug_objects_cache",
                      sizeof (struct debug_obj), 0,
                      SLAB_DEBUG_OBJECTS, NULL);

    if (!obj_cache || debug_objects_replace_static_objects()) {
        debug_objects_enabled = 0;
        if (obj_cache)
            kmem_cache_destroy(obj_cache);
        printk(KERN_WARNING "ODEBUG: out of memory.\n");
    } else
        debug_objects_selftest();
}

1.7 kmemleak_init

定义在mm/kmemleak.c中
void __init kmemleak_init(void)
{
    int i;
    unsigned long flags;

#ifdef CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF
    if (!kmemleak_skip_disable) {
        atomic_set(&kmemleak_early_log, 0);
        kmemleak_disable();
        return;
    }
#endif

    jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
    jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);

    object_cache = KMEM_CACHE(kmemleak_object, SLAB_NOLEAKTRACE);
    scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE);

    if (crt_early_log >= ARRAY_SIZE(early_log))
        pr_warning("Early log buffer exceeded (%d), please increase "
               "DEBUG_KMEMLEAK_EARLY_LOG_SIZE\n", crt_early_log);

    /* the kernel is still in UP mode, so disabling the IRQs is enough */
    local_irq_save(flags);
    atomic_set(&kmemleak_early_log, 0);
    if (atomic_read(&kmemleak_error)) {
        local_irq_restore(flags);
        return;
    } else
        atomic_set(&kmemleak_enabled, 1);
    local_irq_restore(flags);

    /*
     * This is the point where tracking allocations is safe. Automatic
     * scanning is started during the late initcall. Add the early logged
     * callbacks to the kmemleak infrastructure.
     */
    for (i = 0; i < crt_early_log; i++) {
        struct early_log *log = &early_log[i];

        switch (log->op_type) {
        case KMEMLEAK_ALLOC:
            early_alloc(log);
            break;
        case KMEMLEAK_ALLOC_PERCPU:
            early_alloc_percpu(log);
            break;
        case KMEMLEAK_FREE:
            kmemleak_free(log->ptr);
            break;
        case KMEMLEAK_FREE_PART:
            kmemleak_free_part(log->ptr, log->size);
            break;
        case KMEMLEAK_FREE_PERCPU:
            kmemleak_free_percpu(log->ptr);
            break;
        case KMEMLEAK_NOT_LEAK:
            kmemleak_not_leak(log->ptr);
            break;
        case KMEMLEAK_IGNORE:
            kmemleak_ignore(log->ptr);
            break;
        case KMEMLEAK_SCAN_AREA:
            kmemleak_scan_area(log->ptr, log->size, GFP_KERNEL);
            break;
        case KMEMLEAK_NO_SCAN:
            kmemleak_no_scan(log->ptr);
            break;
        default:
            kmemleak_warn("Unknown early log operation: %d\n",
                      log->op_type);
        }

        if (atomic_read(&kmemleak_warning)) {
            print_log_trace(log);
            atomic_set(&kmemleak_warning, 0);
        }
    }
}

1.8 setup_per_cpu_pageset

定义在mm/page_alloc.c中
void __init setup_per_cpu_pageset(void)
{
    struct zone *zone;

    for_each_populated_zone(zone)
        setup_zone_pageset(zone);
}

1.9 numa_policy_init

定义在mm/mempolicy.c中
void __init numa_policy_init(void)
{
    nodemask_t interleave_nodes;
    unsigned long largest = 0;
    int nid, prefer = 0;

    policy_cache = kmem_cache_create("numa_policy",
                     sizeof(struct mempolicy),
                     0, SLAB_PANIC, NULL);

    sn_cache = kmem_cache_create("shared_policy_node",
                     sizeof(struct sp_node),
                     0, SLAB_PANIC, NULL);

    for_each_node(nid) {
        preferred_node_policy[nid] = (struct mempolicy) {
            .refcnt = ATOMIC_INIT(1),
            .mode = MPOL_PREFERRED,
            .flags = MPOL_F_MOF | MPOL_F_MORON,
            .v = { .preferred_node = nid, },
        };
    }

    /*
     * Set interleaving policy for system init. Interleaving is only
     * enabled across suitably sized nodes (default is >= 16MB), or
     * fall back to the largest node if they're all smaller.
     */
    nodes_clear(interleave_nodes);
    for_each_node_state(nid, N_MEMORY) {
        unsigned long total_pages = node_present_pages(nid);

        /* Preserve the largest node */
        if (largest < total_pages) {
            largest = total_pages;
            prefer = nid;
        }

        /* Interleave this node? */
        if ((total_pages << PAGE_SHIFT) >= (16 << 20))
            node_set(nid, interleave_nodes);
    }

    /* All too small, use the largest */
    if (unlikely(nodes_empty(interleave_nodes)))
        node_set(prefer, interleave_nodes);

    if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes))
        printk("numa_policy_init: interleaving failed\n");

    check_numabalancing_enable();
}

1.10 sched_clock_init

定义在kernel/sched/clock.c中
void sched_clock_init(void)
{
    u64 ktime_now = ktime_to_ns(ktime_get());
    int cpu;

    for_each_possible_cpu(cpu) {
        struct sched_clock_data *scd = cpu_sdc(cpu);

        scd->tick_raw = 0;
        scd->tick_gtod = ktime_now;
        scd->clock = ktime_now;
    }

    sched_clock_running = 1;
}

1.11 calibrate_delay

定义在init/calibrate.c中
void __cpuinit calibrate_delay(void)
{
    unsigned long lpj;
    static bool printed;
    int this_cpu = smp_processor_id();

    if (per_cpu(cpu_loops_per_jiffy, this_cpu)) {
        lpj = per_cpu(cpu_loops_per_jiffy, this_cpu);
        if (!printed)
            pr_info("Calibrating delay loop (skipped) "
                "already calibrated this CPU");
    } else if (preset_lpj) {
        lpj = preset_lpj;
        if (!printed)
            pr_info("Calibrating delay loop (skipped) "
                "preset value.. ");
    } else if ((!printed) && lpj_fine) {
        lpj = lpj_fine;
        pr_info("Calibrating delay loop (skipped), "
            "value calculated using timer frequency.. ");
    } else if ((lpj = calibrate_delay_is_known())) {
        ;
    } else if ((lpj = calibrate_delay_direct()) != 0) {
        if (!printed)
            pr_info("Calibrating delay using timer "
                "specific routine.. ");
    } else {
        if (!printed)
            pr_info("Calibrating delay loop... ");
        lpj = calibrate_delay_converge();
    }
    per_cpu(cpu_loops_per_jiffy, this_cpu) = lpj;
    if (!printed)
        pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
            lpj/(500000/HZ),
            (lpj/(5000/HZ)) % 100, lpj);

    loops_per_jiffy = lpj;
    printed = true;
}


1.12 pidmap_init

定义在kernel/pid.c中
void __init pidmap_init(void)
{
    /* Veryify no one has done anything silly */
    BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);

    /* bump default and minimum pid_max based on number of cpus */
    pid_max = min(pid_max_max, max_t(int, pid_max,
                PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
    pid_max_min = max_t(int, pid_max_min,
                PIDS_PER_CPU_MIN * num_possible_cpus());
    pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);

    init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
    /* Reserve PID 0. We never call free_pidmap(0) */
    set_bit(0, init_pid_ns.pidmap[0].page);
    atomic_dec(&init_pid_ns.pidmap[0].nr_free);
    init_pid_ns.nr_hashed = PIDNS_HASH_ADDING;

    init_pid_ns.pid_cachep = KMEM_CACHE(pid,
            SLAB_HWCACHE_ALIGN | SLAB_PANIC);
}

1.13 anon_vma_init

定义在mm/rmap.c中
void __init anon_vma_init(void)
{
    anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
            0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
    anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
}

1.14 thread_info_cache_init

定义在kernel/fork.c中
void thread_info_cache_init(void)
{
    thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
                          THREAD_SIZE, 0, NULL);
    BUG_ON(thread_info_cache == NULL);
}

1.15 cred_init

定义在kernel/cred.c中
void __init cred_init(void)
{
    /* allocate a slab in which we can store credentials */
    cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred),
                     0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
}

1.16 fork_init

定义在kernel/fork.c中
void __init fork_init(unsigned long mempages)
{
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
#ifndef ARCH_MIN_TASKALIGN
#define ARCH_MIN_TASKALIGN  L1_CACHE_BYTES
#endif
    /* create a slab on which task_structs can be allocated */
    task_struct_cachep =
        kmem_cache_create("task_struct", sizeof(struct task_struct),
            ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
#endif

    /* do the arch specific task caches init */
    arch_task_cache_init();

    /*
     * The default maximum number of threads is set to a safe
     * value: the thread structures can take up at most half
     * of memory.
     */
    max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);

    /*
     * we need to allow at least 20 threads to boot a system
     */
    if (max_threads < 20)
        max_threads = 20;

    init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
    init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
    init_task.signal->rlim[RLIMIT_SIGPENDING] =
        init_task.signal->rlim[RLIMIT_NPROC];
}

1.17 proc_caches_init

定义在kernel/fork.c中
void __init proc_caches_init(void)
{
    sighand_cachep = kmem_cache_create("sighand_cache",
            sizeof(struct sighand_struct), 0,
            SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
            SLAB_NOTRACK, sighand_ctor);
    signal_cachep = kmem_cache_create("signal_cache",
            sizeof(struct signal_struct), 0,
            SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
    files_cachep = kmem_cache_create("files_cache",
            sizeof(struct files_struct), 0,
            SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
    fs_cachep = kmem_cache_create("fs_cache",
            sizeof(struct fs_struct), 0,
            SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
    /*
     * FIXME! The "sizeof(struct mm_struct)" currently includes the
     * whole struct cpumask for the OFFSTACK case. We could change
     * this to *only* allocate as much of it as required by the
     * maximum number of CPU's we can ever have.  The cpumask_allocation
     * is at the end of the structure, exactly for that reason.
     */
    mm_cachep = kmem_cache_create("mm_struct",
            sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
            SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
    vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
    mmap_init();
    nsproxy_cache_init();
}

1.18 buffer_init

定义在fs/buffer.c中
void __init buffer_init(void)
{
    unsigned long nrpages;

    bh_cachep = kmem_cache_create("buffer_head",
            sizeof(struct buffer_head), 0,
                (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
                SLAB_MEM_SPREAD),
                NULL);

    /*
     * Limit the bh occupancy to 10% of ZONE_NORMAL
     */
    nrpages = (nr_free_buffer_pages() * 10) / 100;
    max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
    hotcpu_notifier(buffer_cpu_notify, 0);
}

1.19 key_init

定义在security/keys/key.c中
void __init key_init(void)
{
    /* allocate a slab in which we can store keys */
    key_jar = kmem_cache_create("key_jar", sizeof(struct key),
            0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);

    /* add the special key types */
    list_add_tail(&key_type_keyring.link, &key_types_list);
    list_add_tail(&key_type_dead.link, &key_types_list);
    list_add_tail(&key_type_user.link, &key_types_list);
    list_add_tail(&key_type_logon.link, &key_types_list);

    /* record the root user tracking */
    rb_link_node(&root_key_user.node,
             NULL,
             &key_user_tree.rb_node);

    rb_insert_color(&root_key_user.node,
            &key_user_tree);
}

1.20 security_init

定义在security/security.c中
int __init security_init(void)
{
    printk(KERN_INFO "Security Framework initialized\n");

    security_fixup_ops(&default_security_ops);
    security_ops = &default_security_ops;
    do_security_initcalls();

    return 0;
}

1.21 dbg_late_init

定义在kernel/debug/debug_core.c中
void __init dbg_late_init(void)
{
    dbg_is_early = false;
    if (kgdb_io_module_registered)
        kgdb_arch_late();
    kdb_init(KDB_INIT_FULL);
}


1.22 vfs_caches_init

定义在fs/dcache.c中
void __init vfs_caches_init(unsigned long mempages)
{
    unsigned long reserve;

    /* Base hash sizes on available memory, with a reserve equal to
           150% of current kernel size */

    reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
    mempages -= reserve;

    names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
            SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);

    dcache_init();
    inode_init();
    files_init(mempages);
    mnt_init();
    bdev_cache_init();
    chrdev_init();
}

1.23 page_writeback_init

定义在mm/page-writeback.c中
void __init page_writeback_init(void)
{
    writeback_set_ratelimit();
    register_cpu_notifier(&ratelimit_nb);

    fprop_global_init(&writeout_completions);
}

1.24 proc_root_init

定义在fs/proc/root.c中
void __init proc_root_init(void)
{
    int err;

    proc_init_inodecache();
    err = register_filesystem(&proc_fs_type);
    if (err)
        return;

    proc_self_init();
    proc_symlink("mounts", NULL, "self/mounts");

    proc_net_init();

#ifdef CONFIG_SYSVIPC
    proc_mkdir("sysvipc", NULL);
#endif
    proc_mkdir("fs", NULL);
    proc_mkdir("driver", NULL);
    proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
    /* just give it a mountpoint */
    proc_mkdir("openprom", NULL);
#endif
    proc_tty_init();
#ifdef CONFIG_PROC_DEVICETREE
    proc_device_tree_init();
#endif
    proc_mkdir("bus", NULL);
    proc_sys_init();
}

1.25 cgroup_init

定义在kernel/cgroup.c中
int __init cgroup_init(void)
{
    int err;
    int i;
    unsigned long key;

    err = bdi_init(&cgroup_backing_dev_info);
    if (err)
        return err;

    for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
        struct cgroup_subsys *ss = subsys[i];

        /* at bootup time, we don't worry about modular subsystems */
        if (!ss || ss->module)
            continue;
        if (!ss->early_init)
            cgroup_init_subsys(ss);
        if (ss->use_id)
            cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
    }

    /* Add init_css_set to the hash table */
    key = css_set_hash(init_css_set.subsys);
    hash_add(css_set_table, &init_css_set.hlist, key);
    BUG_ON(!init_root_id(&rootnode));

    cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
    if (!cgroup_kobj) {
        err = -ENOMEM;
        goto out;
    }

    err = register_filesystem(&cgroup_fs_type);
    if (err < 0) {
        kobject_put(cgroup_kobj);
        goto out;
    }

    proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);

out:
    if (err)
        bdi_destroy(&cgroup_backing_dev_info);

    return err;
}


1.26 cpuset_init

定义在kernel/cpuset.c中
int __init cpuset_init(void)
{
    int err = 0;

    if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL))
        BUG();

    cpumask_setall(top_cpuset.cpus_allowed);
    nodes_setall(top_cpuset.mems_allowed);

    fmeter_init(&top_cpuset.fmeter);
    set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
    top_cpuset.relax_domain_level = -1;

    err = register_filesystem(&cpuset_fs_type);
    if (err < 0)
        return err;

    if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
        BUG();

    number_of_cpusets = 1;
    return 0;
}

1.27 taskstats_init_early

定义在kernel/taskstats.c中
void __init taskstats_init_early(void)
{
    unsigned int i;

    taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC);
    for_each_possible_cpu(i) {
        INIT_LIST_HEAD(&(per_cpu(listener_array, i).list));
        init_rwsem(&(per_cpu(listener_array, i).sem));
    }
}

1.28 acpi_early_init

定义在drivers/acpi/bus.c中
void __init acpi_early_init(void)
{
    acpi_status status = AE_OK;

    if (acpi_disabled)
        return;

    printk(KERN_INFO PREFIX "Core revision %08x\n", ACPI_CA_VERSION);

    /* enable workarounds, unless strict ACPI spec. compliance */
    if (!acpi_strict)
        acpi_gbl_enable_interpreter_slack = TRUE;

    acpi_gbl_permanent_mmap = 1;

    /*
     * If the machine falls into the DMI check table,
     * DSDT will be copied to memory
     */
    dmi_check_system(dsdt_dmi_table);

    status = acpi_reallocate_root_table();
    if (ACPI_FAILURE(status)) {
        printk(KERN_ERR PREFIX
               "Unable to reallocate ACPI tables\n");
        goto error0;
    }

    status = acpi_initialize_subsystem();
    if (ACPI_FAILURE(status)) {
        printk(KERN_ERR PREFIX
               "Unable to initialize the ACPI Interpreter\n");
        goto error0;
    }

    status = acpi_load_tables();
    if (ACPI_FAILURE(status)) {
        printk(KERN_ERR PREFIX
               "Unable to load the System Description Tables\n");
        goto error0;
    }

#ifdef CONFIG_X86
    if (!acpi_ioapic) {
        /* compatible (0) means level (3) */
        if (!(acpi_sci_flags & ACPI_MADT_TRIGGER_MASK)) {
            acpi_sci_flags &= ~ACPI_MADT_TRIGGER_MASK;
            acpi_sci_flags |= ACPI_MADT_TRIGGER_LEVEL;
        }
        /* Set PIC-mode SCI trigger type */
        acpi_pic_sci_set_trigger(acpi_gbl_FADT.sci_interrupt,
                     (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2);
    } else {
        /*
         * now that acpi_gbl_FADT is initialized,
         * update it with result from INT_SRC_OVR parsing
         */
        acpi_gbl_FADT.sci_interrupt = acpi_sci_override_gsi;
    }
#endif

    status = acpi_enable_subsystem(~ACPI_NO_ACPI_ENABLE);
    if (ACPI_FAILURE(status)) {
        printk(KERN_ERR PREFIX "Unable to enable ACPI\n");
        goto error0;
    }

    /*
     * If the system is using ACPI then we can be reasonably
     * confident that any regulators are managed by the firmware
     * so tell the regulator core it has everything it needs to
     * know.
     */
    regulator_has_full_constraints();

    return;

      error0:
    disable_acpi();
    return;
}

1.29 sfi_init_late

定义在drivers/sfi/sfi_core.c中
void __init sfi_init_late(void)
{
    int length;

    if (sfi_disabled)
        return;

    length = syst_va->header.len;
    sfi_unmap_memory(syst_va, sizeof(struct sfi_table_simple));

    /* Use ioremap now after it is ready */
    sfi_use_ioremap = 1;
    syst_va = sfi_map_memory(syst_pa, length);

    sfi_acpi_init();
}

1.30 ftrace_init

定义在kernel/trace/ftrace.c中
void __init ftrace_init(void)
{
    unsigned long count, addr, flags;
    int ret;

    /* Keep the ftrace pointer to the stub */
    addr = (unsigned long)ftrace_stub;

    local_irq_save(flags);
    ftrace_dyn_arch_init(&addr);
    local_irq_restore(flags);

    /* ftrace_dyn_arch_init places the return code in addr */
    if (addr)
        goto failed;

    count = __stop_mcount_loc - __start_mcount_loc;

    ret = ftrace_dyn_table_alloc(count);
    if (ret)
        goto failed;

    last_ftrace_enabled = ftrace_enabled = 1;

    ret = ftrace_process_locs(NULL,
                  __start_mcount_loc,
                  __stop_mcount_loc);

    ret = register_module_notifier(&ftrace_module_enter_nb);
    if (ret)
        pr_warning("Failed to register trace ftrace module enter notifier\n");

    ret = register_module_notifier(&ftrace_module_exit_nb);
    if (ret)
        pr_warning("Failed to register trace ftrace module exit notifier\n");

    set_ftrace_early_filters();

    return;
 failed:
    ftrace_disabled = 1;
}

你可能感兴趣的:(Android,Kernel,ARM)