Linux学习之start_kernel()概述

start_kernel()内容

\linux-4.18\init\main.c

asmlinkage __visible void __init start_kernel(void)
{
	char *command_line;
	char *after_dashes;

	set_task_stack_end_magic(&init_task);
	smp_setup_processor_id();
	debug_objects_early_init();

	cgroup_init_early();

	local_irq_disable();
	early_boot_irqs_disabled = true;

	/*
	 * Interrupts are still disabled. Do necessary setups, then
	 * enable them.
	 */
	boot_cpu_init();
	page_address_init();
	pr_notice("%s", linux_banner);
	setup_arch(&command_line);
	/*
	 * Set up the the initial canary and entropy after arch
	 * and after adding latent and command line entropy.
	 */
	add_latent_entropy();
	add_device_randomness(command_line, strlen(command_line));
	boot_init_stack_canary();
	mm_init_cpumask(&init_mm);
	setup_command_line(command_line);
	setup_nr_cpu_ids();
	setup_per_cpu_areas();
	smp_prepare_boot_cpu();	/* arch-specific boot-cpu hooks */
	boot_cpu_hotplug_init();

	build_all_zonelists(NULL);
	page_alloc_init();

	pr_notice("Kernel command line: %s\n", boot_command_line);
	parse_early_param();
	after_dashes = parse_args("Booting kernel",
				  static_command_line, __start___param,
				  __stop___param - __start___param,
				  -1, -1, NULL, &unknown_bootoption);
	if (!IS_ERR_OR_NULL(after_dashes))
		parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
			   NULL, set_init_arg);

	jump_label_init();

	/*
	 * These use large bootmem allocations and must precede
	 * kmem_cache_init()
	 */
	setup_log_buf(0);
	vfs_caches_init_early();
	sort_main_extable();
	trap_init();
	mm_init();

	ftrace_init();

	/* trace_printk can be enabled here */
	early_trace_init();

	/*
	 * Set up the scheduler prior starting any interrupts (such as the
	 * timer interrupt). Full topology setup happens at smp_init()
	 * time - but meanwhile we still have a functioning scheduler.
	 */
	sched_init();
	/*
	 * Disable preemption - early bootup scheduling is extremely
	 * fragile until we cpu_idle() for the first time.
	 */
	preempt_disable();
	if (WARN(!irqs_disabled(),
		 "Interrupts were enabled *very* early, fixing it\n"))
		local_irq_disable();
	radix_tree_init();

	/*
	 * Set up housekeeping before setting up workqueues to allow the unbound
	 * workqueue to take non-housekeeping into account.
	 */
	housekeeping_init();

	/*
	 * Allow workqueue creation and work item queueing/cancelling
	 * early.  Work item execution depends on kthreads and starts after
	 * workqueue_init().
	 */
	workqueue_init_early();

	rcu_init();

	/* Trace events are available after this */
	trace_init();

	if (initcall_debug)
		initcall_debug_enable();

	context_tracking_init();
	/* init some links before init_ISA_irqs() */
	early_irq_init();
	init_IRQ();
	tick_init();
	rcu_init_nohz();
	init_timers();
	hrtimers_init();
	softirq_init();
	timekeeping_init();
	time_init();
	sched_clock_postinit();
	printk_safe_init();
	perf_event_init();
	profile_init();
	call_function_init();
	WARN(!irqs_disabled(), "Interrupts were enabled early\n");
	early_boot_irqs_disabled = false;
	local_irq_enable();

	kmem_cache_init_late();

	/*
	 * HACK ALERT! This is early. We're enabling the console before
	 * we've done PCI setups etc, and console_init() must be aware of
	 * this. But we do want output early, in case something goes wrong.
	 */
	console_init();
	if (panic_later)
		panic("Too many boot %s vars at `%s'", panic_later,
		      panic_param);

	lockdep_info();

	/*
	 * Need to run this when irqs are enabled, because it wants
	 * to self-test [hard/soft]-irqs on/off lock inversion bugs
	 * too:
	 */
	locking_selftest();

	/*
	 * This needs to be called before any devices perform DMA
	 * operations that might use the SWIOTLB bounce buffers. It will
	 * mark the bounce buffers as decrypted so that their usage will
	 * not cause "plain-text" data to be decrypted when accessed.
	 */
	mem_encrypt_init();

#ifdef CONFIG_BLK_DEV_INITRD
	if (initrd_start && !initrd_below_start_ok &&
	    page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
		pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
		    page_to_pfn(virt_to_page((void *)initrd_start)),
		    min_low_pfn);
		initrd_start = 0;
	}
#endif
	page_ext_init();
	kmemleak_init();
	debug_objects_mem_init();
	setup_per_cpu_pageset();
	numa_policy_init();
	acpi_early_init();
	if (late_time_init)
		late_time_init();
	calibrate_delay();
	pid_idr_init();
	anon_vma_init();
#ifdef CONFIG_X86
	if (efi_enabled(EFI_RUNTIME_SERVICES))
		efi_enter_virtual_mode();
#endif
	thread_stack_cache_init();
	cred_init();
	fork_init();
	proc_caches_init();
	uts_ns_init();
	buffer_init();
	key_init();
	security_init();
	dbg_late_init();
	vfs_caches_init();
	pagecache_init();
	signals_init();
	seq_file_init();
	proc_root_init();
	nsfs_init();
	cpuset_init();
	cgroup_init();
	taskstats_init_early();
	delayacct_init();

	check_bugs();

	acpi_subsystem_init();
	arch_post_acpi_subsys_init();
	sfi_init_late();

	if (efi_enabled(EFI_RUNTIME_SERVICES)) {
		efi_free_boot_services();
	}

	/* Do the rest non-__init'ed, we're now alive */
	rest_init();
}

“asmlinkage”、"__visible"、"__init"宏定义

\linux-4.18\include\linux\linkage.h

#ifdef __cplusplus
#define CPP_ASMLINKAGE extern "C"
#else
#define CPP_ASMLINKAGE
#endif

#ifndef asmlinkage
#define asmlinkage CPP_ASMLINKAGE
#endif

首先是关于asmlinkage __visible void __init start_kernel(void)的“asmlinkage”、“__visible”和“__init”的作用。

Asmlinkage是一个宏,用于在gcc编译时将指定的函数作为C语言处理。

作为C++函数处理,在对象文件(机器语言)阶段,将信息(参数数量,参数类型信息)添加到函数名称中。

这里添加的信息用于“确定系统应该从多个因重载而存在的同名函数中调用哪个函数”。

C语言不会在函数名称中包含附加信息的前提下工作,因此如果将函数编译为C++,则会发生链接错误。

解决此错误需要asmlinkage宏(extern“C”)。

下面是__visible的定义。这个宏根据使用的编译器的不同而定义不同。此定义为#define__visible(空白),或者定义如下:
\linux-4.18\include\linux\compiler-gcc.h

#define __visible	__attribute__((externally_visible))

作为GCC扩展功能的__attribute__可以通过参数的值将各种属性添加到函数、变量和类型。
Externally_visible在gcc的函数属性中描述如下:

externally_visible
This attribute, attached to a global variable or function, nullifies the effect of the -fwhole-program >command-line option, so the object remains visible outside the current compilation unit.
If -fwhole-program is used together with -flto and gold is used as the linker plugin, externally_visible >attributes are automatically added to functions (not variable yet due to a current gold issue) that are >accessed outside of LTO objects according to resolution file produced by gold. For other linkers that >cannot generate resolution file, explicit externally_visible attributes are still necessary.

Externally_visible的作用是禁用gcc编译选项“-fwhole_program(优化选项)”,并通过flto和gold在文件之间进行链接。

最后,__init“的定义。

\linux-4.18\include\linux\init.h

#define __init		__section(.init.text) __cold  __latent_entropy __noinitretpoline

__section(.init.text)是用于将函数数据放置在内存中初始化函数的区域中的声明。
内存部分可以分为放置函数的区域,堆栈的区域,模块的区域等。
RTOS(Real Time OS)的section划分规模小,简单易懂,在意的话请调查一下。
__cold与__visible一样,根据编译器的不同而不同。
此内容为#define__cold(空白),或者定义如下。

#define __cold			__attribute__((__cold__))

在__attribute__中指定__cold时的效果如下(部分摘录):

cold
The cold attribute on functions is used to inform the compiler that the function is unlikely to be
executed. The function is optimized for size rather than speed and on many targets it is placed into
special subsection of the text section so all cold functions appears close together improving code
locality of non-cold parts of program.

只要阅读这里的说明,它似乎与__section(.init.text)起着相同的作用。
我猜想,根据编译器的版本,只启用__section()或__cold中的一个。
最后的notrace定义如下。

\linux-4.18\include\linux\compiler_types.h

#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
#define notrace __attribute__((hotpatch(0,0)))
#else
#define notrace __attribute__((no_instrument_function))
#endif

No_instrument_function防止函数的递归调用。

set_task_stack_end_magic

\linux-4.18\kernel\fork.c

void set_task_stack_end_magic(struct task_struct *tsk)
{
	unsigned long *stackend;

	stackend = end_of_stack(tsk);
	*stackend = STACK_END_MAGIC;	/* for overflow detection */
}

在成为堆栈的伸长界限的地方写入用STACK_END_MAGIC定义的值,如果该值发生变化,则超过堆栈的伸长界限将值push,即检测堆栈溢出发生的情况。

那么,这个检查在哪里实施呢?在task_stack_end_corrupted()中实施

root/include/linux/sched/task_stack.h
task_stack_end_corrupted()

smp_setup_processor_id

 584 void __init smp_setup_processor_id(void)
 585 {
 586     int i;
Status:Open

I<1>20201203 不喜
SMP出ないならcpuは0。SMPならread_cpuid_mpidr()から情報取ってくる
 587     u32 mpidr = is_smp() ? read_cpuid_mpidr() & MPIDR_HWID_BITMASK : 0;
 588     u32 cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
 589 
 590     cpu_logical_map(0) = cpu;
 591     for (i = 1; i < nr_cpu_ids; ++i)
 592         cpu_logical_map(i) = i == cpu ? 0 : i;
 593 
 594     /*
 595      * clear __my_cpu_offset on boot CPU to avoid hang caused by
 596      * using percpu variable early, for example, lockdep will
 597      * access percpu variable inside lock_release
 598      */
 599     set_my_cpu_offset(0);
 600 
 601     pr_info("Booting Linux on physical CPU 0x%x\n", mpidr);
 602 }

smp_setup_processor_id() 函数的主要任务就是设置 boot CPU 信息;函数首先调用 is_smp() 函数判断 当前系统是否支持 SMP。

debug_objects_early_init

struct debug_bucket {
    struct hlist_head   list;
    raw_spinlock_t      lock;
};
static struct debug_bucket  obj_hash[ODEBUG_HASH_SIZE];

void __init debug_objects_early_init(void)
{
    int i;

    for (i = 0; i < ODEBUG_HASH_SIZE; i++)
        raw_spin_lock_init(&obj_hash[i].lock);

    for (i = 0; i < ODEBUG_POOL_SIZE; i++)
        hlist_add_head(&obj_static_pool[i].node, &obj_pool);
}

raw_spin_lock_init(include/linux/spinlock.h)初始化raw_spinlock_t结构的成员变量。spin lock是一种解决多处理器系统同步问题的机制。

重复短循环等待,直到其他进程被锁定使用的资源被释放为止。这里的spin lock不是用于读写资源(变量),似乎用于“lifetime debugging of objects(对象生存期调试)”。

boot_init_stack_canary

static __always_inline void boot_init_stack_canary(void)
{
    unsigned long canary;

    /* Try to get a semi random initial value. */
    get_random_bytes(&canary, sizeof(canary));
    canary ^= LINUX_VERSION_CODE;

    current->stack_canary = canary;
    __stack_chk_guard = current->stack_canary;
}

作为安全措施,将canary设置为current(当前正在运行的进程)的堆栈。

通常,堆栈存储参数,返回地址等,但如果第三方可以重写此内容,则可以执行任意代码。

待续…

你可能感兴趣的:(Linux,kernel)