Start_kernel()中调用了一系列初始化函数,以完成kernel本身的设置。这些动作有的是公共的,有的则是需要配置的才会执行的。
在start_kernel()函数中,
输出Linux版本信息(printk(linux_banner))
设置与体系结构相关的环境(setup_arch())
页表结构初始化(paging_init())
使用"arch/alpha/kernel/entry.S"中的入口点设置系统自陷入口(trap_init())
使用alpha_mv结构和entry.S入口初始化系统IRQ(init_IRQ())
核心进程调度器初始化(包括初始化几个缺省的Bottom-half,sched_init())
时间、定时器初始化(包括读取CMOS时钟、估测主频、初始化定时器中断等,time_init())
提取并分析核心启动参数(从环境变量中读取参数,设置相应标志位等待处理,(parse_options())
控制台初始化(为输出信息而先于PCI初始化,console_init())
剖析器数据结构初始化(prof_buffer和prof_len变量)
核心Cache初始化(描述Cache信息的Cache,kmem_cache_init())
延迟校准(获得时钟jiffies与CPU主频ticks的延迟,calibrate_delay())
内存初始化(设置内存上下界和页表项初始值,mem_init())
创建和设置内部及通用cache("slab_cache",kmem_cache_sizes_init())
创建uid taskcount SLAB cache("uid_cache",uidcache_init())
创建文件cache("files_cache",filescache_init())
创建目录cache("dentry_cache",dcache_init())
创建与虚存相关的cache("vm_area_struct","mm_struct",vma_init())
块设备读写缓冲区初始化(同时创建"buffer_head"cache用户加速访问,buffer_init())
创建页cache(内存页hash表初始化,page_cache_init())
创建信号队列cache("signal_queue",signals_init())
初始化内存inode表(inode_init())
创建内存文件描述符表("filp_cache",file_table_init())
检查体系结构漏洞(对于alpha,此函数为空,check_bugs())
SMP机器其余CPU(除当前引导CPU)初始化(对于没有配置SMP的内核,此函数为空,smp_init())
启动init过程(创建第一个核心线程,调用init()函数,原执行序列调用cpu_idle() 等待调度,init())
至此start_kernel()结束,基本的核心环境已经建立起来了。
asmlinkage void __init start_kernel(void)
{
char * command_line;
unsigned long mempages;
extern char saved_command_line[];
/*
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
/*锁内核*/
lock_kernel();
/*打印内核的版本和编译的信息*/
printk(linux_banner);
/*解析内核的命令行中与内存相关的信息和内存分布信息*/
setup_arch(&command_line);
/*打印命令行信息*/
printk("Kernel command line: %s\n", saved_command_line);
/*解析传递给内核的命令行中的0号进程的程序名和环境变量*/
parse_options(command_line);
/*常用中断,异常的初始化*/
trap_init();
/*非常用的中断初始化*/
init_IRQ();
/*调度相关的计时器和底半部的初始化*/
sched_init();
/*时钟初始化*/
time_init();
/*软中断tasklet初始化*/
softirq_init();
/*
* HACK ALERT! This is early. We're enabling the console before
* we've done PCI setups etc, and console_init() must be aware of
* this. But we do want output early, in case something goes wrong.
*/
/*终端初始化*/
console_init();
#ifdef CONFIG_MODULES
/*初始化模块symbol表大小*/
init_modules();
#endif
if (prof_shift) {
unsigned int size;
/* only text is profiled */
prof_len = (unsigned long) &_etext - (unsigned long) &_stext;
prof_len >>= prof_shift;
size = prof_len * sizeof(unsigned int) + PAGE_SIZE-1;
prof_buffer = (unsigned int *) alloc_bootmem(size);
}
/*初始化slab分配器*/
kmem_cache_init();
sti();
calibrate_delay();
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start && !initrd_below_start_ok &&
initrd_start < min_low_pfn << PAGE_SHIFT) {
printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
"disabling it.\n",initrd_start,min_low_pfn << PAGE_SHIFT);
initrd_start = 0;
}
#endif
/*设置高端内存和内存的标志位*/
mem_init();
/*初始化内部和一般的slab分配器*/
kmem_cache_sizes_init();
#ifdef CONFIG_3215_CONSOLE
con3215_activate();
#endif
#ifdef CONFIG_PROC_FS
/*建立proc文件系统的目录*/
proc_root_init();
#endif
mempages = num_physpages;
/*初始化最大线程数*/
fork_init(mempages);
/*创建一些常用的slab分配器的数据结构*/
proc_caches_init();
vfs_caches_init(mempages);
/*初始化buffer数据结构*/
buffer_init(mempages);
/*初始化页表的缓冲结构*/
page_cache_init(mempages);
kiobuf_setup();
/*创建signal的slab数据结构*/
signals_init();
bdev_init();
/*初始化文件系统的inode结构*/
inode_init(mempages);
#if defined(CONFIG_SYSVIPC)
/*初始化sysv的信号量,消息,共享内存*/
ipc_init();
#endif
#if defined(CONFIG_QUOTA)
dquot_init_hash();
#endif
check_bugs();
printk("POSIX conformance testing by UNIFIX\n");
/*
* We count on the initial thread going ok
* Like idlers init is an unlocked kernel thread, which will
* make syscalls (and thus be locked).
*/
/*初始化SMP,主要是APIC的初始化*/
smp_init();
/*创建init进程*/
kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
unlock_kernel();
current->need_resched = 1;
/*运行idle进程,进行调度*/
cpu_idle();
}
void __init setup_arch(char **cmdline_p)
{
unsigned long bootmap_size;
unsigned long start_pfn, max_pfn, max_low_pfn;
int i;
#ifdef CONFIG_VISWS
visws_get_board_type_and_rev();
#endif
/*将rootfs转化成kdev的表示形式,这里跟传统的表示没有不同*/
ROOT_DEV = to_kdev_t(ORIG_ROOT_DEV);
drive_info = DRIVE_INFO;
screen_info = SCREEN_INFO;
apm_info.bios = APM_BIOS_INFO;
/*将系统的描述信息写入全局变量中*/
if( SYS_DESC_TABLE.length != 0 ) {
MCA_bus = SYS_DESC_TABLE.table[3] &0x2;
machine_id = SYS_DESC_TABLE.table[0];
machine_submodel_id = SYS_DESC_TABLE.table[1];
BIOS_revision = SYS_DESC_TABLE.table[2];
}
aux_device_present = AUX_DEVICE_INFO;
#ifdef CONFIG_BLK_DEV_RAM
/*设置RAMDISK的标志*/
rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
#endif
setup_memory_region();
if (!MOUNT_ROOT_RDONLY)
root_mountflags &= ~MS_RDONLY;
/*在init_mm存放数据段,代码段和堆栈段的起始地址,结束地址*/
init_mm.start_code = (unsigned long) &_text;
init_mm.end_code = (unsigned long) &_etext;
init_mm.end_data = (unsigned long) &_edata;
init_mm.brk = (unsigned long) &_end;
/*存放内核和数据段的起始和结束地址,这里是转化为物理地址存放的*/
code_resource.start = virt_to_bus(&_text);
code_resource.end = virt_to_bus(&_etext)-1;
data_resource.start = virt_to_bus(&_etext);
data_resource.end = virt_to_bus(&_edata)-1;
/*解析命令行中的"mem="参数*/
parse_mem_cmdline(cmdline_p);
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
/*
* 128MB for vmalloc and initrd
*/
#define VMALLOC_RESERVE (unsigned long)(128 << 20)
#define MAXMEM (unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)
#define MAXMEM_PFN PFN_DOWN(MAXMEM)
#define MAX_NONPAE_PFN (1 << 20)
/*
* partially used pages are not usable - thus
* we are rounding upwards:
*/
/*找到起始物理页号*/
start_pfn = PFN_UP(__pa(&_end));
/*
* Find the highest page frame number we have available
*/
/*从E820中找到最高物理页号*/
max_pfn = 0;
for (i = 0; i < e820.nr_map; i++) {
unsigned long start, end;
/* RAM? */
if (e820.map[i].type != E820_RAM)
continue;
start = PFN_UP(e820.map[i].addr);
end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
if (start >= end)
continue;
if (end > max_pfn)
max_pfn = end;
}
/*
* Determine low and high memory ranges:
*/
/*找到最高的低端物理页号,既896M对应的物理页号*/
max_low_pfn = max_pfn;
if (max_low_pfn > MAXMEM_PFN) {
max_low_pfn = MAXMEM_PFN;
#ifndef CONFIG_HIGHMEM
/* Maximum memory usable is what is directly addressable */
printk(KERN_WARNING "Warning only %ldMB will be used.\n",
MAXMEM>>20);
if (max_pfn > MAX_NONPAE_PFN)
printk(KERN_WARNING "Use a PAE enabled kernel.\n");
else
printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
#else /* !CONFIG_HIGHMEM */
#ifndef CONFIG_X86_PAE
if (max_pfn > MAX_NONPAE_PFN) {
max_pfn = MAX_NONPAE_PFN;
printk(KERN_WARNING "Warning only 4GB will be used.\n");
printk(KERN_WARNING "Use a PAE enabled kernel.\n");
}
#endif /* !CONFIG_X86_PAE */
#endif /* !CONFIG_HIGHMEM */
}
/*设置高端内存的起始和结束地址*/
#ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn;
if (max_pfn > MAXMEM_PFN) {
highstart_pfn = MAXMEM_PFN;
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
pages_to_mb(highend_pfn - highstart_pfn));
}
#endif
/*
* Initialize the boot-time allocator (with low memory only):
*/
/*初始化896M以下的boot内存*/
bootmap_size = init_bootmem(start_pfn, max_low_pfn);
/*
* Register fully available low RAM pages with the bootmem allocator.
*/
/*将896M以下的内存设为可用状态*/
for (i = 0; i < e820.nr_map; i++) {
unsigned long curr_pfn, last_pfn, size;
/*
* Reserve usable low memory
*/
if (e820.map[i].type != E820_RAM)
continue;
/*
* We are rounding up the start address of usable memory:
*/
curr_pfn = PFN_UP(e820.map[i].addr);
if (curr_pfn >= max_low_pfn)
continue;
/*
* ... and at the end of the usable range downwards:
*/
last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
if (last_pfn > max_low_pfn)
last_pfn = max_low_pfn;
/*
* .. finally, did all the rounding and playing
* around just make the area go away?
*/
if (last_pfn <= curr_pfn)
continue;
size = last_pfn - curr_pfn;
free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
}
/*
* Reserve the bootmem bitmap itself as well. We do this in two
* steps (first step was init_bootmem()) because this catches
* the (very unlikely) case of us accidentally initializing the
* bootmem allocator with an invalid RAM area.
*/
/*保留bootmem自己的内存*/
reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
/*
* reserve physical page 0 - it's a special BIOS page on many boxes,
* enabling clean reboots, SMP operation, laptop functions.
*/
/*保留第一个物理页*/
reserve_bootmem(0, PAGE_SIZE);
#ifdef CONFIG_SMP
/*
* But first pinch a few for the stack/trampoline stuff
* FIXME: Don't need the extra page at 4K, but need to fix
* trampoline before removing it. (see the GDT stuff)
*/
reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
#endif
#ifdef CONFIG_X86_IO_APIC
/*
* Find and reserve possible boot-time SMP configuration:
*/
find_smp_config();
#endif
/*初始化构建页表*/
paging_init();
#ifdef CONFIG_X86_IO_APIC
/*
* get boot-time SMP configuration:
*/
if (smp_found_config)
get_smp_config();
#endif
#ifdef CONFIG_X86_LOCAL_APIC
init_apic_mappings();
#endif
#ifdef CONFIG_BLK_DEV_INITRD
/*将RAMDISK的空间保留下来*/
if (LOADER_TYPE && INITRD_START) {
if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
reserve_bootmem(INITRD_START, INITRD_SIZE);
initrd_start =
INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
initrd_end = initrd_start+INITRD_SIZE;
}
else {
printk("initrd extends beyond end of memory "
"(0x%08lx > 0x%08lx)\ndisabling initrd\n",
INITRD_START + INITRD_SIZE,
max_low_pfn << PAGE_SHIFT);
initrd_start = 0;
}
}
#endif
/*
* Request address space for all standard RAM and ROM resources
* and also for regions reported as reserved by the e820.
*/
/*将ROM加入资源列表中*/
probe_roms();
/*将RAM等各种资源加入列表中*/
for (i = 0; i < e820.nr_map; i++) {
struct resource *res;
if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
continue;
res = alloc_bootmem_low(sizeof(struct resource));
switch (e820.map[i].type) {
case E820_RAM: res->name = "System RAM"; break;
case E820_ACPI: res->name = "ACPI Tables"; break;
case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
default: res->name = "reserved";
}
res->start = e820.map[i].addr;
res->end = res->start + e820.map[i].size - 1;
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
if (e820.map[i].type == E820_RAM) {
/*
* We dont't know which RAM region contains kernel data,
* so we try it repeatedly and let the resource manager
* test it.
*/
request_resource(res, &code_resource);
request_resource(res, &data_resource);
}
}
request_resource(&iomem_resource, &vram_resource);
/* request I/O space for devices used on all i[345]86 PCs */
for (i = 0; i < STANDARD_IO_RESOURCES; i++)
request_resource(&ioport_resource, standard_io_resources+i);
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
conswitchp = &vga_con;
#elif defined(CONFIG_DUMMY_CONSOLE)
conswitchp = &dummy_con;
#endif
#endif
}