start_kernel

 

void __init x86_64_start_kernel(char * real_mode_data)
{
	int i;

	/* 内核映像和模块区域映射的完整性检查 */
	...

	/* clear bss before set_intr_gate with early_idt_handler */
	clear_bss();

	/* Make NULL pointers segfault */
	zap_identity_mappings();

	/* KERNEL_IMAGE_START =	0xffffffff80000000UL	KERNEL_IMAGE_SIZE =	512M
	 * PAGE_OFFSET =	0xffff880000000000UL	PAGE_SHIFT =		12
	 */
	max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;

	for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)	// segment.h中定义NUM_EXCEPTION_VECTORS = 32
		set_intr_gate(i, early_idt_handler);	// 往idt_table[i]数组中写入一个门描述符,处理函数是early_idt_handler

	/* struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
	 * lidt汇编指令利用idt_descr变量初始化idtr寄存器,NR_VECTORS = 256 */
	load_idt((const struct desc_ptr *)&idt_descr);

	x86_64_start_reservations(real_mode_data);
}

static void __init zap_identity_mappings(void)
{
	/* (pgd_t *)(init_mm.pgd + pgd_index(addr)) = (pgd_t)0 */
	pgd_t *pgd = pgd_offset_k(0UL);
	pgd_clear(pgd);

	/* Read-Modify-Write to CR4 */
	__flush_tlb_all();
}

static void __init clear_bss(void)
{ memset(__bss_start, 0, (unsigned long) __bss_stop - (unsigned long) __bss_start); }

static void __init copy_bootdata(char *real_mode_data)
{
	char * command_line;

	memcpy(&boot_params, real_mode_data, sizeof boot_params);
	if (boot_params.hdr.cmd_line_ptr) {
		command_line = __va(boot_params.hdr.cmd_line_ptr);
		memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
	}
}
void __init x86_64_start_reservations(char *real_mode_data)
{
	copy_bootdata(__va(real_mode_data));
	memblock_init();
	memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");

#ifdef CONFIG_BLK_DEV_INITRD
	/* Reserve INITRD */
	if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
		/* Assume only end is not page aligned */
		unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
		unsigned long ramdisk_size  = boot_params.hdr.ramdisk_size;
		unsigned long ramdisk_end   = PAGE_ALIGN(ramdisk_image + ramdisk_size);
		memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK");
	}
#endif

	reserve_ebda_region();
	/* At this point everything still needed from the boot loader or BIOS or kernel text 
	 * should be early reserved or marked not RAM in e820. All other memory is free game. */
	start_kernel();
}


asmlinkage void __init start_kernel(void)
{
	char * command_line;
	extern const struct kernel_param __start___param[], __stop___param[];

	/* 当只有一个CPU的时候这个函数什么都不做,SMP时,返回在启动的时候的那个CPU号 */
	smp_setup_processor_id();

	/* Need to run as early as possible, to initialize the lockdep hash: */
	lockdep_init();
	debug_objects_early_init();

	/* Set up the the initial canary ASAP: */
	boot_init_stack_canary();
	cgroup_init_early();	/* 系统启动时的cgroup初始化,初始化那些要求early init的子系统 */
	local_irq_disable();
	early_boot_irqs_disabled = true;
	/* Interrupts are still disabled. Do necessary setups, then enable them */
	tick_init();
	boot_cpu_init();
	/* 初始化页地址,使用链表将其链接起来 */
	page_address_init();
	printk(KERN_NOTICE "%s", linux_banner);

	/* 体系结构相关函数,由源码树顶层目录下的Makefile中的ARCH变量决定 */
	setup_arch(&command_line);
	mm_init_owner(&init_mm, &init_task);	/* init_mm.owner = &init_task */
	mm_init_cpumask(&init_mm);
	setup_command_line(command_line);
	setup_nr_cpu_ids();			/* nr_cpu_ids = NR_CPUS */
	setup_per_cpu_areas();
	smp_prepare_boot_cpu();			/* arch-specific boot-cpu hooks */

	build_all_zonelists(NULL);
	page_alloc_init();

	printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);

	/* 对内核选项的两次解析 */
	parse_early_param();
	parse_args("Booting kernel", static_command_line, __start___param,
		   __stop___param - __start___param, &unknown_bootoption);

	/* These use large bootmem allocations and must precede kmem_cache_init() */
	setup_log_buf(0);

	/* 初始化hash表,便于从进程的PID获得对应的进程描述符指针 */
	pidhash_init();

	/* 虚拟文件系统初始化,包括dcache\inode\files\mnt\bdev_cache\chrdev_init() */
	vfs_caches_init_early();
	sort_main_extable();

	/* trap_init函数完成对系统保留中断向量(异常、非屏蔽中断以及系统调用)的初始化
	 * init_IRQ函数则完成其余中断向量的初始化
	 */
	trap_init();
	mm_init();

	/* Set up the scheduler prior starting any interrupts (such as the
	 * timer interrupt). Full topology setup happens at smp_init()
	 * time - but meanwhile we still have a functioning scheduler.
	 */
	sched_init();
	/*
	 * Disable preemption - early bootup scheduling is extremely
	 * fragile until we cpu_idle() for the first time.
	 */
	preempt_disable();
	if (!irqs_disabled()) {
		printk(KERN_WARNING "start_kernel(): bug: interrupts were "
				"enabled *very* early, fixing it\n");
		local_irq_disable();
	}
	idr_init_cache();

	/* NOTE: */
	perf_event_init();

	rcu_init();
	radix_tree_init();
	/* init some links before init_ISA_irqs() */
	early_irq_init();
	init_IRQ();
	prio_tree_init();

	/* 初始化定时器相关的数据结构*/
	init_timers();

	/* 对高精度时钟进行初始化 */
	hrtimers_init();

	softirq_init();
	timekeeping_init();

	/* 初始化系统时钟源 */
	time_init();

	/* 对内核的profile功能(一个内核性能调试工具)进行初始化 */
	profile_init();
	call_function_init();
	if (!irqs_disabled())
		printk(KERN_CRIT "start_kernel(): bug: interrupts were "
				 "enabled early\n");
	early_boot_irqs_disabled = false;
	local_irq_enable();

	/* Interrupts are enabled now so all GFP allocations are safe. */
	gfp_allowed_mask = __GFP_BITS_MASK;

	/* slab初始化 */
	kmem_cache_init_late();

	/*
	 * HACK ALERT! This is early. We're enabling the console before
	 * we've done PCI setups etc, and console_init() must be aware of
	 * this. But we do want output early, in case something goes wrong.
	 */
	/* 控制台初始化以显示printk的内容,在此之前调用的printk只是把数据存到缓冲区里 */
	console_init();
	if (panic_later)
		panic(panic_later, panic_param);

	/* 如果定义了CONFIG_LOCKDEP宏,则打印锁依赖信息,否则什么也不做 */
	lockdep_info();

	/*
	 * Need to run this when irqs are enabled, because it wants
	 * to self-test [hard/soft]-irqs on/off lock inversion bugs
	 * too:
	 */
	locking_selftest();

#ifdef CONFIG_BLK_DEV_INITRD
	if (initrd_start && !initrd_below_start_ok &&
	    page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
		printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
		    "disabling it.\n",
		    page_to_pfn(virt_to_page((void *)initrd_start)),
		    min_low_pfn);
		initrd_start = 0;
	}
#endif
	page_cgroup_init();
	enable_debug_pagealloc();
	debug_objects_mem_init();
	kmemleak_init();
	setup_per_cpu_pageset();
	numa_policy_init();
	if (late_time_init)
		late_time_init();
	sched_clock_init();

	/* 根据CPU在1s内执行极短循环的次数,得到BogoMIPS值 */
	calibrate_delay();
	pidmap_init();
	anon_vma_init();
#ifdef CONFIG_X86
	if (efi_enabled)
		efi_enter_virtual_mode();
#endif
	thread_info_cache_init();
	cred_init();

	/* 根据物理内存大小计算允许创建进程数量 */
	fork_init(totalram_pages);
	proc_caches_init();
	buffer_init();
	key_init();
	security_init();
	dbg_late_init();
	vfs_caches_init(totalram_pages);
	signals_init();
	/* rootfs populating might need page-writeback */
	page_writeback_init();
#ifdef CONFIG_PROC_FS
	proc_root_init();
#endif
	cgroup_init();		/* 注册cgroup文件系统并创建/proc/cgroup文件,初始化所有在cgroup_init_early中没有初始化的子系统 */
	cpuset_init();
	taskstats_init_early();
	delayacct_init();

	/* 测试CPU的各种缺陷,记录检测到的缺陷,以便于内核的其他部分可以使用它们的工作
	 * check_bugs=>identify_boot_cpu=>identify_cpu会做很多工作,包括select_idle_routine以及intel_init_thermal 
	 */
	check_bugs();

	acpi_early_init(); /* before LAPIC and SMP init */
	sfi_init_late();

	ftrace_init();

	/* Do the rest non-__init'ed, we're now alive */
	/* 创建init进程 */
	rest_init();
}

 

你可能感兴趣的:(内核之旅)