start_kernel函数是kernel启动过程执行的第一个c语言函数,其通过调用一系列初始化函数完成的内核的初始化工作,下篇分析
local_irq_enable之后的代码。
定义在init/main.c中
asmlinkage void __init start_kernel(void)
{
char * command_line;
extern const struct kernel_param __start___param[], __stop___param[];
/*
* Need to run as early as possible, to initialize the
* lockdep hash:
*/
lockdep_init(); // 初始化内核死锁检测机制的哈希表
smp_setup_processor_id(); // 返回cpu号,单核cpu返回0
debug_objects_early_init(); // 对调试对象进行早期的初始化
cgroup_init_early(); // 对Control Groups进行早期的初始化
local_irq_disable(); // 关闭当前cpu的中断
early_boot_irqs_disabled = true;
/*
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
boot_cpu_init(); // 设置当前cpu位激活状态
page_address_init(); // 初始化高端内存的,arm没有用到
pr_notice("%s", linux_banner);
setup_arch(&command_line); // 内核架构相关初始化函数
/*
* Set up the the initial canary ASAP:
*/
boot_init_stack_canary(); // 初始化栈canary值,canary值用于防止栈溢出攻击的堆栈的保护字
mm_init_owner(&init_mm, &init_task); // mm.owner = &init_task
mm_init_cpumask(&init_mm);
setup_command_line(command_line); // 对cmdline进行备份
setup_nr_cpu_ids(); // nr_cpu_ids
setup_per_cpu_areas(); // 每个cpu的per-cpu变量副本分配空间
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
build_all_zonelists(NULL, NULL); // 建立系统内存页区(zone)链表
page_alloc_init(); // 内存页初始化
pr_notice("Kernel command line: %s\n", boot_command_line);
parse_early_param(); // 解析需要'早期'处理的启动参数用?setup_arch已经调用了一次
parse_args("Booting kernel", static_command_line, __start___param,
__stop___param - __start___param,
-1, -1, &unknown_bootoption); // 解析cmdline中的启动参数
jump_label_init(); // 处理静态定义在跳转标号
/*
* These use large bootmem allocations and must precede
* kmem_cache_init()
*/
setup_log_buf(0); // 使用memblock_alloc分配一个启动时log缓冲区
pidhash_init(); // 初始化pid散列表
vfs_caches_init_early(); // 初始化dentry和inode的hashtable
sort_main_extable(); // 对内核异常向量表进行排序
trap_init(); // 对内核陷阱异常进行初始化,arm没有用到
mm_init(); // 初始化内核内存分配器,过度到伙伴系统,启动slab机制,初始化非连续内存区
/*
* Set up the scheduler prior starting any interrupts (such as the
* timer interrupt). Full topology setup happens at smp_init()
* time - but meanwhile we still have a functioning scheduler.
*/
sched_init(); // 初始化进程调度器
/*
* Disable preemption - early bootup scheduling is extremely
* fragile until we cpu_idle() for the first time.
*/
preempt_disable(); // 进制内核抢占
if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable(); // 关闭本地中断
idr_init_cache(); // 创建idr(整数id管理机制)高速缓存
perf_event_init(); // 初始化性能诊断工具
rcu_init(); // 初始化rcu机制(读-写-拷贝)
tick_nohz_init(); // 初始化动态时钟框架
radix_tree_init(); // 初始化内核基数树
/* init some links before init_ISA_irqs() */
early_irq_init(); // arm64没有用到
init_IRQ(); // 初始化中断
tick_init(); // 初始化时钟滴答控制器
init_timers(); // 初始化内核定时器
hrtimers_init(); // 初始化高精度时钟
softirq_init(); // 初始化软中断
timekeeping_init(); // 初始化了大量的时钟相关全局变量
time_init(); // 时钟初始化
profile_init(); // 对内核的一个性能测试工具profile进行初始化
call_function_init(); // smp下跨cpu的函数传递初始化
WARN(!irqs_disabled(), "Interrupts were enabled early\n");
early_boot_irqs_disabled = false;
local_irq_enable(); // 使能当前cpu中断
// -----------------------------------------------------------
kmem_cache_init_late(); // 初始化slab分配器的缓存机制
/*
* HACK ALERT! This is early. We're enabling the console before
* we've done PCI setups etc, and console_init() must be aware of
* this. But we do want output early, in case something goes wrong.
*/
console_init(); // 初始化控制台
if (panic_later)
panic(panic_later, panic_param);
lockdep_info(); // 打印锁的依赖信息
/*
* Need to run this when irqs are enabled, because it wants
* to self-test [hard/soft]-irqs on/off lock inversion bugs
* too:
*/
locking_selftest(); // 死锁检测
#ifdef CONFIG_BLK_DEV_INITRD // 检查initrd的位置是否符合要求
if (initrd_start && !initrd_below_start_ok &&
page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
page_to_pfn(virt_to_page((void *)initrd_start)),
min_low_pfn);
initrd_start = 0;
}
#endif
page_cgroup_init(); // 为page_cgroup相关结构分配存储空间
debug_objects_mem_init(); // 创建debug_obj的slab高速缓存
kmemleak_init(); // 初始化内存泄漏检测机制
setup_per_cpu_pageset(); // 设置并初始化每个cpu的页组
numa_policy_init(); // 初始化NUMA的内存访问策略
if (late_time_init) // arm64为空
sched_clock_init(); // 初始化调度器时钟
calibrate_delay(); // 延时校准
pidmap_init(); // 初始化进程pid位图
anon_vma_init(); // 创建anon_vma的slab缓存
#ifdef CONFIG_X86
if (efi_enabled(EFI_RUNTIME_SERVICES))
efi_enter_virtual_mode();
#endif
thread_info_cache_init(); // 创建进程thread_info的slab高速缓存
cred_init(); // 创建任务信用系统的slab高速缓存
fork_init(totalram_pages); // 初始化进程创建机制
proc_caches_init(); // 创建进程所需的各结构体slab高速缓存
buffer_init(); // 为buffer_head结构体创建slab高速缓存
key_init(); // 初始化内核密钥管理系统
security_init(); // 初始化内核安全框架
dbg_late_init(); // 初始化内核调试模块kdb
vfs_caches_init(totalram_pages); // 初始化虚拟文件系统
signals_init(); // 创建信号队列slab高速缓存
/* rootfs populating might need page-writeback */
page_writeback_init(); // 初始化页回写机制
#ifdef CONFIG_PROC_FS
proc_root_init(); // 初始化proc文件系统
#endif
cgroup_init(); // control group正式初始化
cpuset_init(); // 初始化cpuset
taskstats_init_early(); // 任务状态早期初始化函数,创建高速缓存并初始化互斥机制
delayacct_init(); // 初始化任务延时机制
check_bugs(); // arm64为空
acpi_early_init(); /* before LAPIC and SMP init */ // 初始化acpi电源管理
sfi_init_late(); // simple fireware interface
if (efi_enabled(EFI_RUNTIME_SERVICES)) { // arm暂时没有用到
efi_late_init();
efi_free_boot_services();
}
ftrace_init(); // 初始化ftrace
/* Do the rest non-__init'ed, we're now alive */
rest_init(); // 后续初始化,单独分析
}
1.1 kmem_cache_init_late
定义在mm/slab.c中
void __init kmem_cache_init_late(void)
{
struct kmem_cache *cachep;
slab_state = UP;
/* 6) resize the head arrays to their final sizes */
mutex_lock(&slab_mutex);
list_for_each_entry(cachep, &slab_caches, list)
if (enable_cpucache(cachep, GFP_NOWAIT))
BUG();
mutex_unlock(&slab_mutex);
/* Annotate slab for lockdep -- annotate the malloc caches */
init_lock_keys();
/* Done! */
slab_state = FULL;
/*
* Register a cpu startup notifier callback that initializes
* cpu_cache_get for all new cpus
*/
register_cpu_notifier(&cpucache_notifier);
#ifdef CONFIG_NUMA
/*
* Register a memory hotplug callback that initializes and frees
* node.
*/
hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
#endif
/*
* The reap timers are started later, with a module init call: That part
* of the kernel is not yet operational.
*/
}
1.2 console_init
定义在drivers/tty/tty_io.c中
void __init console_init(void)
{
initcall_t *call;
/* Setup the default TTY line discipline. */
tty_ldisc_begin();
/*
* set up the console device so that later boot sequences can
* inform about problems etc..
*/
call = __con_initcall_start;
while (call < __con_initcall_end) {
(*call)();
call++;
}
}
1.3 lockdep_info
定义在kernel/lockdep.c中
void __init lockdep_info(void)
{
printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES);
printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH);
printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS);
printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE);
printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES);
printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS);
printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE);
printk(" memory used by lock dependency info: %lu kB\n",
(sizeof(struct lock_class) * MAX_LOCKDEP_KEYS +
sizeof(struct list_head) * CLASSHASH_SIZE +
sizeof(struct lock_list) * MAX_LOCKDEP_ENTRIES +
sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS +
sizeof(struct list_head) * CHAINHASH_SIZE
#ifdef CONFIG_PROVE_LOCKING
+ sizeof(struct circular_queue)
#endif
) / 1024
);
printk(" per task-struct memory footprint: %lu bytes\n",
sizeof(struct held_lock) * MAX_LOCK_DEPTH);
#ifdef CONFIG_DEBUG_LOCKDEP
if (lockdep_init_error) {
printk("WARNING: lockdep init error! lock-%s was acquired"
"before lockdep_init\n", lock_init_error);
printk("Call stack leading to lockdep invocation was:\n");
print_stack_trace(&lockdep_init_trace, 0);
}
#endif
}
1.4 locking_selftest
定义在lib/locking-selftest.c中
void locking_selftest(void)
{
/*
* Got a locking failure before the selftest ran?
*/
if (!debug_locks) {
printk("----------------------------------\n");
printk("| Locking API testsuite disabled |\n");
printk("----------------------------------\n");
return;
}
/*
* Run the testsuite:
*/
printk("------------------------\n");
printk("| Locking API testsuite:\n");
printk("----------------------------------------------------------------------------\n");
printk(" | spin |wlock |rlock |mutex | wsem | rsem |\n");
printk(" --------------------------------------------------------------------------\n");
init_shared_classes();
debug_locks_silent = !debug_locks_verbose;
DO_TESTCASE_6R("A-A deadlock", AA);
DO_TESTCASE_6R("A-B-B-A deadlock", ABBA);
DO_TESTCASE_6R("A-B-B-C-C-A deadlock", ABBCCA);
DO_TESTCASE_6R("A-B-C-A-B-C deadlock", ABCABC);
DO_TESTCASE_6R("A-B-B-C-C-D-D-A deadlock", ABBCCDDA);
DO_TESTCASE_6R("A-B-C-D-B-D-D-A deadlock", ABCDBDDA);
DO_TESTCASE_6R("A-B-C-D-B-C-D-A deadlock", ABCDBCDA);
DO_TESTCASE_6("double unlock", double_unlock);
DO_TESTCASE_6("initialize held", init_held);
DO_TESTCASE_6_SUCCESS("bad unlock order", bad_unlock_order);
printk(" --------------------------------------------------------------------------\n");
print_testname("recursive read-lock");
printk(" |");
dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK);
printk(" |");
dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM);
printk("\n");
print_testname("recursive read-lock #2");
printk(" |");
dotest(rlock_AA1B, SUCCESS, LOCKTYPE_RWLOCK);
printk(" |");
dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM);
printk("\n");
print_testname("mixed read-write-lock");
printk(" |");
dotest(rlock_AA2, FAILURE, LOCKTYPE_RWLOCK);
printk(" |");
dotest(rsem_AA2, FAILURE, LOCKTYPE_RWSEM);
printk("\n");
print_testname("mixed write-read-lock");
printk(" |");
dotest(rlock_AA3, FAILURE, LOCKTYPE_RWLOCK);
printk(" |");
dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM);
printk("\n");
printk(" --------------------------------------------------------------------------\n");
/*
* irq-context testcases:
*/
DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1);
DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A);
DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B);
DO_TESTCASE_6x6("safe-A + unsafe-B #1", irqsafe3);
DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4);
DO_TESTCASE_6x6RW("irq lock-inversion", irq_inversion);
DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
// DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
if (unexpected_testcase_failures) {
printk("-----------------------------------------------------------------\n");
debug_locks = 0;
printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n",
unexpected_testcase_failures, testcase_total);
printk("-----------------------------------------------------------------\n");
} else if (expected_testcase_failures && testcase_successes) {
printk("--------------------------------------------------------\n");
printk("%3d out of %3d testcases failed, as expected. |\n",
expected_testcase_failures, testcase_total);
printk("----------------------------------------------------\n");
debug_locks = 1;
} else if (expected_testcase_failures && !testcase_successes) {
printk("--------------------------------------------------------\n");
printk("All %3d testcases failed, as expected. |\n",
expected_testcase_failures);
printk("----------------------------------------\n");
debug_locks = 1;
} else {
printk("-------------------------------------------------------\n");
printk("Good, all %3d testcases passed! |\n",
testcase_successes);
printk("---------------------------------\n");
debug_locks = 1;
}
debug_locks_silent = 0;
}
1.5 page_cgroup_init
定义在mm/page_cgroup.c中
void __init page_cgroup_init(void)
{
unsigned long pfn;
int nid;
if (mem_cgroup_disabled())
return;
for_each_node_state(nid, N_MEMORY) {
unsigned long start_pfn, end_pfn;
start_pfn = node_start_pfn(nid);
end_pfn = node_end_pfn(nid);
/*
* start_pfn and end_pfn may not be aligned to SECTION and the
* page->flags of out of node pages are not initialized. So we
* scan [start_pfn, the biggest section's pfn < end_pfn) here.
*/
for (pfn = start_pfn;
pfn < end_pfn;
pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
if (!pfn_valid(pfn))
continue;
/*
* Nodes's pfns can be overlapping.
* We know some arch can have a nodes layout such as
* -------------pfn-------------->
* N0 | N1 | N2 | N0 | N1 | N2|....
*/
if (pfn_to_nid(pfn) != nid)
continue;
if (init_section_page_cgroup(pfn, nid))
goto oom;
}
}
hotplug_memory_notifier(page_cgroup_callback, 0);
printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
printk(KERN_INFO "please try 'cgroup_disable=memory' option if you "
"don't want memory cgroups\n");
return;
oom:
printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n");
panic("Out of memory");
}
1.6 debug_objects_mem_init
定义在lib/debugobjects.c中
void __init debug_objects_mem_init(void)
{
if (!debug_objects_enabled)
return;
obj_cache = kmem_cache_create("debug_objects_cache",
sizeof (struct debug_obj), 0,
SLAB_DEBUG_OBJECTS, NULL);
if (!obj_cache || debug_objects_replace_static_objects()) {
debug_objects_enabled = 0;
if (obj_cache)
kmem_cache_destroy(obj_cache);
printk(KERN_WARNING "ODEBUG: out of memory.\n");
} else
debug_objects_selftest();
}
1.7 kmemleak_init
定义在mm/kmemleak.c中
void __init kmemleak_init(void)
{
int i;
unsigned long flags;
#ifdef CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF
if (!kmemleak_skip_disable) {
atomic_set(&kmemleak_early_log, 0);
kmemleak_disable();
return;
}
#endif
jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);
object_cache = KMEM_CACHE(kmemleak_object, SLAB_NOLEAKTRACE);
scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE);
if (crt_early_log >= ARRAY_SIZE(early_log))
pr_warning("Early log buffer exceeded (%d), please increase "
"DEBUG_KMEMLEAK_EARLY_LOG_SIZE\n", crt_early_log);
/* the kernel is still in UP mode, so disabling the IRQs is enough */
local_irq_save(flags);
atomic_set(&kmemleak_early_log, 0);
if (atomic_read(&kmemleak_error)) {
local_irq_restore(flags);
return;
} else
atomic_set(&kmemleak_enabled, 1);
local_irq_restore(flags);
/*
* This is the point where tracking allocations is safe. Automatic
* scanning is started during the late initcall. Add the early logged
* callbacks to the kmemleak infrastructure.
*/
for (i = 0; i < crt_early_log; i++) {
struct early_log *log = &early_log[i];
switch (log->op_type) {
case KMEMLEAK_ALLOC:
early_alloc(log);
break;
case KMEMLEAK_ALLOC_PERCPU:
early_alloc_percpu(log);
break;
case KMEMLEAK_FREE:
kmemleak_free(log->ptr);
break;
case KMEMLEAK_FREE_PART:
kmemleak_free_part(log->ptr, log->size);
break;
case KMEMLEAK_FREE_PERCPU:
kmemleak_free_percpu(log->ptr);
break;
case KMEMLEAK_NOT_LEAK:
kmemleak_not_leak(log->ptr);
break;
case KMEMLEAK_IGNORE:
kmemleak_ignore(log->ptr);
break;
case KMEMLEAK_SCAN_AREA:
kmemleak_scan_area(log->ptr, log->size, GFP_KERNEL);
break;
case KMEMLEAK_NO_SCAN:
kmemleak_no_scan(log->ptr);
break;
default:
kmemleak_warn("Unknown early log operation: %d\n",
log->op_type);
}
if (atomic_read(&kmemleak_warning)) {
print_log_trace(log);
atomic_set(&kmemleak_warning, 0);
}
}
}
1.8 setup_per_cpu_pageset
定义在mm/page_alloc.c中
void __init setup_per_cpu_pageset(void)
{
struct zone *zone;
for_each_populated_zone(zone)
setup_zone_pageset(zone);
}
1.9 numa_policy_init
定义在mm/mempolicy.c中
void __init numa_policy_init(void)
{
nodemask_t interleave_nodes;
unsigned long largest = 0;
int nid, prefer = 0;
policy_cache = kmem_cache_create("numa_policy",
sizeof(struct mempolicy),
0, SLAB_PANIC, NULL);
sn_cache = kmem_cache_create("shared_policy_node",
sizeof(struct sp_node),
0, SLAB_PANIC, NULL);
for_each_node(nid) {
preferred_node_policy[nid] = (struct mempolicy) {
.refcnt = ATOMIC_INIT(1),
.mode = MPOL_PREFERRED,
.flags = MPOL_F_MOF | MPOL_F_MORON,
.v = { .preferred_node = nid, },
};
}
/*
* Set interleaving policy for system init. Interleaving is only
* enabled across suitably sized nodes (default is >= 16MB), or
* fall back to the largest node if they're all smaller.
*/
nodes_clear(interleave_nodes);
for_each_node_state(nid, N_MEMORY) {
unsigned long total_pages = node_present_pages(nid);
/* Preserve the largest node */
if (largest < total_pages) {
largest = total_pages;
prefer = nid;
}
/* Interleave this node? */
if ((total_pages << PAGE_SHIFT) >= (16 << 20))
node_set(nid, interleave_nodes);
}
/* All too small, use the largest */
if (unlikely(nodes_empty(interleave_nodes)))
node_set(prefer, interleave_nodes);
if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes))
printk("numa_policy_init: interleaving failed\n");
check_numabalancing_enable();
}
1.10 sched_clock_init
定义在kernel/sched/clock.c中
void sched_clock_init(void)
{
u64 ktime_now = ktime_to_ns(ktime_get());
int cpu;
for_each_possible_cpu(cpu) {
struct sched_clock_data *scd = cpu_sdc(cpu);
scd->tick_raw = 0;
scd->tick_gtod = ktime_now;
scd->clock = ktime_now;
}
sched_clock_running = 1;
}
1.11 calibrate_delay
定义在init/calibrate.c中
void __cpuinit calibrate_delay(void)
{
unsigned long lpj;
static bool printed;
int this_cpu = smp_processor_id();
if (per_cpu(cpu_loops_per_jiffy, this_cpu)) {
lpj = per_cpu(cpu_loops_per_jiffy, this_cpu);
if (!printed)
pr_info("Calibrating delay loop (skipped) "
"already calibrated this CPU");
} else if (preset_lpj) {
lpj = preset_lpj;
if (!printed)
pr_info("Calibrating delay loop (skipped) "
"preset value.. ");
} else if ((!printed) && lpj_fine) {
lpj = lpj_fine;
pr_info("Calibrating delay loop (skipped), "
"value calculated using timer frequency.. ");
} else if ((lpj = calibrate_delay_is_known())) {
;
} else if ((lpj = calibrate_delay_direct()) != 0) {
if (!printed)
pr_info("Calibrating delay using timer "
"specific routine.. ");
} else {
if (!printed)
pr_info("Calibrating delay loop... ");
lpj = calibrate_delay_converge();
}
per_cpu(cpu_loops_per_jiffy, this_cpu) = lpj;
if (!printed)
pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
lpj/(500000/HZ),
(lpj/(5000/HZ)) % 100, lpj);
loops_per_jiffy = lpj;
printed = true;
}
1.12 pidmap_init
定义在kernel/pid.c中
void __init pidmap_init(void)
{
/* Veryify no one has done anything silly */
BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);
/* bump default and minimum pid_max based on number of cpus */
pid_max = min(pid_max_max, max_t(int, pid_max,
PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
pid_max_min = max_t(int, pid_max_min,
PIDS_PER_CPU_MIN * num_possible_cpus());
pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
/* Reserve PID 0. We never call free_pidmap(0) */
set_bit(0, init_pid_ns.pidmap[0].page);
atomic_dec(&init_pid_ns.pidmap[0].nr_free);
init_pid_ns.nr_hashed = PIDNS_HASH_ADDING;
init_pid_ns.pid_cachep = KMEM_CACHE(pid,
SLAB_HWCACHE_ALIGN | SLAB_PANIC);
}
1.13 anon_vma_init
定义在mm/rmap.c中
void __init anon_vma_init(void)
{
anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
}
1.14 thread_info_cache_init
定义在kernel/fork.c中
void thread_info_cache_init(void)
{
thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
THREAD_SIZE, 0, NULL);
BUG_ON(thread_info_cache == NULL);
}
1.15 cred_init
定义在kernel/cred.c中
void __init cred_init(void)
{
/* allocate a slab in which we can store credentials */
cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
}
1.16 fork_init
定义在kernel/fork.c中
void __init fork_init(unsigned long mempages)
{
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
#ifndef ARCH_MIN_TASKALIGN
#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES
#endif
/* create a slab on which task_structs can be allocated */
task_struct_cachep =
kmem_cache_create("task_struct", sizeof(struct task_struct),
ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
#endif
/* do the arch specific task caches init */
arch_task_cache_init();
/*
* The default maximum number of threads is set to a safe
* value: the thread structures can take up at most half
* of memory.
*/
max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);
/*
* we need to allow at least 20 threads to boot a system
*/
if (max_threads < 20)
max_threads = 20;
init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
init_task.signal->rlim[RLIMIT_SIGPENDING] =
init_task.signal->rlim[RLIMIT_NPROC];
}
1.17 proc_caches_init
定义在kernel/fork.c中
void __init proc_caches_init(void)
{
sighand_cachep = kmem_cache_create("sighand_cache",
sizeof(struct sighand_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
SLAB_NOTRACK, sighand_ctor);
signal_cachep = kmem_cache_create("signal_cache",
sizeof(struct signal_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
files_cachep = kmem_cache_create("files_cache",
sizeof(struct files_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
fs_cachep = kmem_cache_create("fs_cache",
sizeof(struct fs_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
/*
* FIXME! The "sizeof(struct mm_struct)" currently includes the
* whole struct cpumask for the OFFSTACK case. We could change
* this to *only* allocate as much of it as required by the
* maximum number of CPU's we can ever have. The cpumask_allocation
* is at the end of the structure, exactly for that reason.
*/
mm_cachep = kmem_cache_create("mm_struct",
sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
mmap_init();
nsproxy_cache_init();
}
1.18 buffer_init
定义在fs/buffer.c中
void __init buffer_init(void)
{
unsigned long nrpages;
bh_cachep = kmem_cache_create("buffer_head",
sizeof(struct buffer_head), 0,
(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
SLAB_MEM_SPREAD),
NULL);
/*
* Limit the bh occupancy to 10% of ZONE_NORMAL
*/
nrpages = (nr_free_buffer_pages() * 10) / 100;
max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
hotcpu_notifier(buffer_cpu_notify, 0);
}
1.19 key_init
定义在security/keys/key.c中
void __init key_init(void)
{
/* allocate a slab in which we can store keys */
key_jar = kmem_cache_create("key_jar", sizeof(struct key),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
/* add the special key types */
list_add_tail(&key_type_keyring.link, &key_types_list);
list_add_tail(&key_type_dead.link, &key_types_list);
list_add_tail(&key_type_user.link, &key_types_list);
list_add_tail(&key_type_logon.link, &key_types_list);
/* record the root user tracking */
rb_link_node(&root_key_user.node,
NULL,
&key_user_tree.rb_node);
rb_insert_color(&root_key_user.node,
&key_user_tree);
}
1.20 security_init
定义在security/security.c中
int __init security_init(void)
{
printk(KERN_INFO "Security Framework initialized\n");
security_fixup_ops(&default_security_ops);
security_ops = &default_security_ops;
do_security_initcalls();
return 0;
}
1.21 dbg_late_init
定义在kernel/debug/debug_core.c中
void __init dbg_late_init(void)
{
dbg_is_early = false;
if (kgdb_io_module_registered)
kgdb_arch_late();
kdb_init(KDB_INIT_FULL);
}
1.22 vfs_caches_init
定义在fs/dcache.c中
void __init vfs_caches_init(unsigned long mempages)
{
unsigned long reserve;
/* Base hash sizes on available memory, with a reserve equal to
150% of current kernel size */
reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
mempages -= reserve;
names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
dcache_init();
inode_init();
files_init(mempages);
mnt_init();
bdev_cache_init();
chrdev_init();
}
1.23 page_writeback_init
定义在mm/page-writeback.c中
void __init page_writeback_init(void)
{
writeback_set_ratelimit();
register_cpu_notifier(&ratelimit_nb);
fprop_global_init(&writeout_completions);
}
1.24 proc_root_init
定义在fs/proc/root.c中
void __init proc_root_init(void)
{
int err;
proc_init_inodecache();
err = register_filesystem(&proc_fs_type);
if (err)
return;
proc_self_init();
proc_symlink("mounts", NULL, "self/mounts");
proc_net_init();
#ifdef CONFIG_SYSVIPC
proc_mkdir("sysvipc", NULL);
#endif
proc_mkdir("fs", NULL);
proc_mkdir("driver", NULL);
proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
/* just give it a mountpoint */
proc_mkdir("openprom", NULL);
#endif
proc_tty_init();
#ifdef CONFIG_PROC_DEVICETREE
proc_device_tree_init();
#endif
proc_mkdir("bus", NULL);
proc_sys_init();
}
1.25 cgroup_init
定义在kernel/cgroup.c中
int __init cgroup_init(void)
{
int err;
int i;
unsigned long key;
err = bdi_init(&cgroup_backing_dev_info);
if (err)
return err;
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
/* at bootup time, we don't worry about modular subsystems */
if (!ss || ss->module)
continue;
if (!ss->early_init)
cgroup_init_subsys(ss);
if (ss->use_id)
cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
}
/* Add init_css_set to the hash table */
key = css_set_hash(init_css_set.subsys);
hash_add(css_set_table, &init_css_set.hlist, key);
BUG_ON(!init_root_id(&rootnode));
cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
if (!cgroup_kobj) {
err = -ENOMEM;
goto out;
}
err = register_filesystem(&cgroup_fs_type);
if (err < 0) {
kobject_put(cgroup_kobj);
goto out;
}
proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
out:
if (err)
bdi_destroy(&cgroup_backing_dev_info);
return err;
}
1.26 cpuset_init
定义在kernel/cpuset.c中
int __init cpuset_init(void)
{
int err = 0;
if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL))
BUG();
cpumask_setall(top_cpuset.cpus_allowed);
nodes_setall(top_cpuset.mems_allowed);
fmeter_init(&top_cpuset.fmeter);
set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
top_cpuset.relax_domain_level = -1;
err = register_filesystem(&cpuset_fs_type);
if (err < 0)
return err;
if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
BUG();
number_of_cpusets = 1;
return 0;
}
1.27 taskstats_init_early
定义在kernel/taskstats.c中
void __init taskstats_init_early(void)
{
unsigned int i;
taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC);
for_each_possible_cpu(i) {
INIT_LIST_HEAD(&(per_cpu(listener_array, i).list));
init_rwsem(&(per_cpu(listener_array, i).sem));
}
}
1.28 acpi_early_init
定义在drivers/acpi/bus.c中
void __init acpi_early_init(void)
{
acpi_status status = AE_OK;
if (acpi_disabled)
return;
printk(KERN_INFO PREFIX "Core revision %08x\n", ACPI_CA_VERSION);
/* enable workarounds, unless strict ACPI spec. compliance */
if (!acpi_strict)
acpi_gbl_enable_interpreter_slack = TRUE;
acpi_gbl_permanent_mmap = 1;
/*
* If the machine falls into the DMI check table,
* DSDT will be copied to memory
*/
dmi_check_system(dsdt_dmi_table);
status = acpi_reallocate_root_table();
if (ACPI_FAILURE(status)) {
printk(KERN_ERR PREFIX
"Unable to reallocate ACPI tables\n");
goto error0;
}
status = acpi_initialize_subsystem();
if (ACPI_FAILURE(status)) {
printk(KERN_ERR PREFIX
"Unable to initialize the ACPI Interpreter\n");
goto error0;
}
status = acpi_load_tables();
if (ACPI_FAILURE(status)) {
printk(KERN_ERR PREFIX
"Unable to load the System Description Tables\n");
goto error0;
}
#ifdef CONFIG_X86
if (!acpi_ioapic) {
/* compatible (0) means level (3) */
if (!(acpi_sci_flags & ACPI_MADT_TRIGGER_MASK)) {
acpi_sci_flags &= ~ACPI_MADT_TRIGGER_MASK;
acpi_sci_flags |= ACPI_MADT_TRIGGER_LEVEL;
}
/* Set PIC-mode SCI trigger type */
acpi_pic_sci_set_trigger(acpi_gbl_FADT.sci_interrupt,
(acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2);
} else {
/*
* now that acpi_gbl_FADT is initialized,
* update it with result from INT_SRC_OVR parsing
*/
acpi_gbl_FADT.sci_interrupt = acpi_sci_override_gsi;
}
#endif
status = acpi_enable_subsystem(~ACPI_NO_ACPI_ENABLE);
if (ACPI_FAILURE(status)) {
printk(KERN_ERR PREFIX "Unable to enable ACPI\n");
goto error0;
}
/*
* If the system is using ACPI then we can be reasonably
* confident that any regulators are managed by the firmware
* so tell the regulator core it has everything it needs to
* know.
*/
regulator_has_full_constraints();
return;
error0:
disable_acpi();
return;
}
1.29 sfi_init_late
定义在drivers/sfi/sfi_core.c中
void __init sfi_init_late(void)
{
int length;
if (sfi_disabled)
return;
length = syst_va->header.len;
sfi_unmap_memory(syst_va, sizeof(struct sfi_table_simple));
/* Use ioremap now after it is ready */
sfi_use_ioremap = 1;
syst_va = sfi_map_memory(syst_pa, length);
sfi_acpi_init();
}
1.30 ftrace_init
定义在kernel/trace/ftrace.c中
void __init ftrace_init(void)
{
unsigned long count, addr, flags;
int ret;
/* Keep the ftrace pointer to the stub */
addr = (unsigned long)ftrace_stub;
local_irq_save(flags);
ftrace_dyn_arch_init(&addr);
local_irq_restore(flags);
/* ftrace_dyn_arch_init places the return code in addr */
if (addr)
goto failed;
count = __stop_mcount_loc - __start_mcount_loc;
ret = ftrace_dyn_table_alloc(count);
if (ret)
goto failed;
last_ftrace_enabled = ftrace_enabled = 1;
ret = ftrace_process_locs(NULL,
__start_mcount_loc,
__stop_mcount_loc);
ret = register_module_notifier(&ftrace_module_enter_nb);
if (ret)
pr_warning("Failed to register trace ftrace module enter notifier\n");
ret = register_module_notifier(&ftrace_module_exit_nb);
if (ret)
pr_warning("Failed to register trace ftrace module exit notifier\n");
set_ftrace_early_filters();
return;
failed:
ftrace_disabled = 1;
}