start_kernel

asmlinkage void __init start_kernel(void)

{

char * command_line;

unsigned long mempages;

extern char saved_command_line[];

/*

 * Interrupts are still disabled. Do necessary setups, then

 * enable them

 */

  /*锁内核*/

lock_kernel();

/*打印内核的版本和编译的信息*/

printk(linux_banner);

/*解析内核的命令行中与内存相关的信息和内存分布信息*/

setup_arch(&command_line); 

/*打印命令行信息*/

printk("Kernel command line: %s\n", saved_command_line);

/*解析传递给内核的命令行中的0号进程的程序名和环境变量*/

parse_options(command_line);

/*常用中断,异常的初始化*/

trap_init();

/*非常用的中断初始化*/

init_IRQ();

/*调度相关的计时器和底半部的初始化*/

sched_init();

/*时钟初始化*/

time_init();

/*软中断tasklet初始化*/

softirq_init();



/*

* HACK ALERT! This is early. We're enabling the console before

* we've done PCI setups etc, and console_init() must be aware of

* this. But we do want output early, in case something goes wrong.

*/

/*终端初始化*/

console_init();

#ifdef CONFIG_MODULES

/*初始化模块symbol表大小*/

init_modules();

#endif

if (prof_shift) {

unsigned int size;

/* only text is profiled */

prof_len = (unsigned long) &_etext - (unsigned long) &_stext;

prof_len >>= prof_shift;


size = prof_len * sizeof(unsigned int) + PAGE_SIZE-1;

prof_buffer = (unsigned int *) alloc_bootmem(size);

}

/*初始化slab分配器*/

kmem_cache_init();

sti();

calibrate_delay();

#ifdef CONFIG_BLK_DEV_INITRD

if (initrd_start && !initrd_below_start_ok &&

initrd_start < min_low_pfn << PAGE_SHIFT) {

printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "

   "disabling it.\n",initrd_start,min_low_pfn << PAGE_SHIFT);

initrd_start = 0;

}

#endif

/*设置高端内存和内存的标志位*/

mem_init();

/*初始化内部和一般的slab分配器*/

kmem_cache_sizes_init();

#ifdef CONFIG_3215_CONSOLE

        con3215_activate();

#endif

#ifdef CONFIG_PROC_FS

/*建立proc文件系统的目录*/

proc_root_init();

#endif

mempages = num_physpages;

/*初始化最大线程数*/

fork_init(mempages);

/*创建一些常用的slab分配器的数据结构*/

proc_caches_init();

vfs_caches_init(mempages);

/*初始化buffer数据结构*/

buffer_init(mempages);

/*初始化页表的缓冲结构*/

page_cache_init(mempages);

kiobuf_setup();

/*创建signal的slab数据结构*/

signals_init();

bdev_init();

/*初始化文件系统的inode结构*/

inode_init(mempages);

#if defined(CONFIG_SYSVIPC)

/*初始化sysv的信号量,消息,共享内存*/

ipc_init();

#endif

#if defined(CONFIG_QUOTA)

dquot_init_hash();

#endif

check_bugs();

        printk("POSIX conformance testing by UNIFIX\n");



/* 

* We count on the initial thread going ok 

* Like idlers init is an unlocked kernel thread, which will

* make syscalls (and thus be locked).

*/

/*初始化SMP,主要是APIC的初始化*/

smp_init();

/*创建init进程*/

kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);

unlock_kernel();

current->need_resched = 1;

/*运行idle进程,进行调度*/

  cpu_idle();

void __init setup_arch(char **cmdline_p)

{

unsigned long bootmap_size;

unsigned long start_pfn, max_pfn, max_low_pfn;

int i;



#ifdef CONFIG_VISWS

visws_get_board_type_and_rev();

#endif

/*将rootfs转化成kdev的表示形式,这里跟传统的表示没有不同*/

  ROOT_DEV = to_kdev_t(ORIG_ROOT_DEV);

  drive_info = DRIVE_INFO;

  screen_info = SCREEN_INFO;

apm_info.bios = APM_BIOS_INFO;

/*将系统的描述信息写入全局变量中*/

if( SYS_DESC_TABLE.length != 0 ) {

MCA_bus = SYS_DESC_TABLE.table[3] &0x2;

machine_id = SYS_DESC_TABLE.table[0];

machine_submodel_id = SYS_DESC_TABLE.table[1];

BIOS_revision = SYS_DESC_TABLE.table[2];

}

aux_device_present = AUX_DEVICE_INFO;



#ifdef CONFIG_BLK_DEV_RAM

/*设置RAMDISK的标志*/

rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;

rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);

rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);

#endif

setup_memory_region();



if (!MOUNT_ROOT_RDONLY)

root_mountflags &= ~MS_RDONLY;

/*在init_mm存放数据段,代码段和堆栈段的起始地址,结束地址*/

init_mm.start_code = (unsigned long) &_text;

init_mm.end_code = (unsigned long) &_etext;

init_mm.end_data = (unsigned long) &_edata;

init_mm.brk = (unsigned long) &_end;



/*存放内核和数据段的起始和结束地址,这里是转化为物理地址存放的*/

code_resource.start = virt_to_bus(&_text);

code_resource.end = virt_to_bus(&_etext)-1;

data_resource.start = virt_to_bus(&_etext);

data_resource.end = virt_to_bus(&_edata)-1;



/*解析命令行中的"mem="参数*/

parse_mem_cmdline(cmdline_p);



#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)

#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)

#define PFN_PHYS(x) ((x) << PAGE_SHIFT)



/*

 * 128MB for vmalloc and initrd

 */

#define VMALLOC_RESERVE (unsigned long)(128 << 20)

#define MAXMEM (unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)

#define MAXMEM_PFN PFN_DOWN(MAXMEM)

#define MAX_NONPAE_PFN (1 << 20)



/*

* partially used pages are not usable - thus

* we are rounding upwards:

*/

/*找到起始物理页号*/ 

start_pfn = PFN_UP(__pa(&_end));



/*

* Find the highest page frame number we have available

*/

/*从E820中找到最高物理页号*/

max_pfn = 0;

for (i = 0; i < e820.nr_map; i++) {

unsigned long start, end;

/* RAM? */

if (e820.map[i].type != E820_RAM)

continue;

start = PFN_UP(e820.map[i].addr);

end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);

if (start >= end)

continue;

if (end > max_pfn)

max_pfn = end;

}



/*

* Determine low and high memory ranges:

*/

/*找到最高的低端物理页号,既896M对应的物理页号*/ 

max_low_pfn = max_pfn;

if (max_low_pfn > MAXMEM_PFN) {

max_low_pfn = MAXMEM_PFN;

#ifndef CONFIG_HIGHMEM

/* Maximum memory usable is what is directly addressable */

printk(KERN_WARNING "Warning only %ldMB will be used.\n",

MAXMEM>>20);

if (max_pfn > MAX_NONPAE_PFN)

printk(KERN_WARNING "Use a PAE enabled kernel.\n");

else

printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");

#else /* !CONFIG_HIGHMEM */

#ifndef CONFIG_X86_PAE

if (max_pfn > MAX_NONPAE_PFN) {

max_pfn = MAX_NONPAE_PFN;

printk(KERN_WARNING "Warning only 4GB will be used.\n");

printk(KERN_WARNING "Use a PAE enabled kernel.\n");

}

#endif /* !CONFIG_X86_PAE */

#endif /* !CONFIG_HIGHMEM */

}

/*设置高端内存的起始和结束地址*/

#ifdef CONFIG_HIGHMEM

highstart_pfn = highend_pfn = max_pfn;

if (max_pfn > MAXMEM_PFN) {

highstart_pfn = MAXMEM_PFN;

printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",

pages_to_mb(highend_pfn - highstart_pfn));

}

#endif

/*

* Initialize the boot-time allocator (with low memory only):

*/

/*初始化896M以下的boot内存*/ 

bootmap_size = init_bootmem(start_pfn, max_low_pfn);



/*

* Register fully available low RAM pages with the bootmem allocator.

*/

/*将896M以下的内存设为可用状态*/ 

for (i = 0; i < e820.nr_map; i++) {

unsigned long curr_pfn, last_pfn, size;

  /*

* Reserve usable low memory

*/

if (e820.map[i].type != E820_RAM)

continue;

/*

* We are rounding up the start address of usable memory:

*/

curr_pfn = PFN_UP(e820.map[i].addr);

if (curr_pfn >= max_low_pfn)

continue;

/*

* ... and at the end of the usable range downwards:

*/

last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);



if (last_pfn > max_low_pfn)

last_pfn = max_low_pfn;



/*

* .. finally, did all the rounding and playing

* around just make the area go away?

*/

if (last_pfn <= curr_pfn)

continue;



size = last_pfn - curr_pfn;

free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));

}

/*

* Reserve the bootmem bitmap itself as well. We do this in two

* steps (first step was init_bootmem()) because this catches

* the (very unlikely) case of us accidentally initializing the

* bootmem allocator with an invalid RAM area.

*/

/*保留bootmem自己的内存*/ 

reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +

bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));



/*

* reserve physical page 0 - it's a special BIOS page on many boxes,

* enabling clean reboots, SMP operation, laptop functions.

*/

/*保留第一个物理页*/ 

reserve_bootmem(0, PAGE_SIZE);



#ifdef CONFIG_SMP

/*

* But first pinch a few for the stack/trampoline stuff

* FIXME: Don't need the extra page at 4K, but need to fix

* trampoline before removing it. (see the GDT stuff)

*/

reserve_bootmem(PAGE_SIZE, PAGE_SIZE);

smp_alloc_memory(); /* AP processor realmode stacks in low memory*/

#endif



#ifdef CONFIG_X86_IO_APIC

/*

* Find and reserve possible boot-time SMP configuration:

*/

find_smp_config();

#endif

/*初始化构建页表*/

paging_init();

#ifdef CONFIG_X86_IO_APIC

/*

* get boot-time SMP configuration:

*/

if (smp_found_config)

get_smp_config();

#endif

#ifdef CONFIG_X86_LOCAL_APIC

init_apic_mappings();

#endif



#ifdef CONFIG_BLK_DEV_INITRD

/*将RAMDISK的空间保留下来*/

if (LOADER_TYPE && INITRD_START) {

if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {

reserve_bootmem(INITRD_START, INITRD_SIZE);

initrd_start =

INITRD_START ? INITRD_START + PAGE_OFFSET : 0;

initrd_end = initrd_start+INITRD_SIZE;

}

else {

printk("initrd extends beyond end of memory "

   "(0x%08lx > 0x%08lx)\ndisabling initrd\n",

   INITRD_START + INITRD_SIZE,

   max_low_pfn << PAGE_SHIFT);

initrd_start = 0;

}

}

#endif



/*

* Request address space for all standard RAM and ROM resources

* and also for regions reported as reserved by the e820.

*/

/*将ROM加入资源列表中*/ 

probe_roms();

/*将RAM等各种资源加入列表中*/

for (i = 0; i < e820.nr_map; i++) {

struct resource *res;

if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)

continue;

res = alloc_bootmem_low(sizeof(struct resource));

switch (e820.map[i].type) {

case E820_RAM: res->name = "System RAM"; break;

case E820_ACPI: res->name = "ACPI Tables"; break;

case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;

default: res->name = "reserved";

}

res->start = e820.map[i].addr;

res->end = res->start + e820.map[i].size - 1;

res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;

request_resource(&iomem_resource, res);

if (e820.map[i].type == E820_RAM) {

/*

*  We dont't know which RAM region contains kernel data,

*  so we try it repeatedly and let the resource manager

*  test it.

*/

request_resource(res, &code_resource);

request_resource(res, &data_resource);

}

}

request_resource(&iomem_resource, &vram_resource);



/* request I/O space for devices used on all i[345]86 PCs */

for (i = 0; i < STANDARD_IO_RESOURCES; i++)

request_resource(&ioport_resource, standard_io_resources+i);



#ifdef CONFIG_VT

#if defined(CONFIG_VGA_CONSOLE)

conswitchp = &vga_con;

#elif defined(CONFIG_DUMMY_CONSOLE)

conswitchp = &dummy_con;

#endif

#endif

}

setup_arch->setup_memory_region 这个表示调用关系 

void __init setup_memory_region(void)

{

char *who = "BIOS-e820";



/*

* Try to copy the BIOS-supplied E820-map.

*

* Otherwise fake a memory map; one section from 0k->640k,

* the next section from 1mb->appropriate_mem_k

*/

/*拷贝E820得到的内存分配图,如果E820没有获得内存分配图,自己创造一个分布图*/ 

if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {

unsigned long mem_size;



/* compare results from other methods and take the greater */

if (ALT_MEM_K < EXT_MEM_K) {

mem_size = EXT_MEM_K;

who = "BIOS-88";

} else {

mem_size = ALT_MEM_K;

who = "BIOS-e801";

}

/*创造两个内存分配,0-0x9f000, 1M-E801或者88找到的内存的最大值*/

e820.nr_map = 0;

add_memory_region(0, LOWMEMSIZE(), E820_RAM);

add_memory_region(HIGH_MEMORY, (mem_size << 10) - HIGH_MEMORY, E820_RAM);

   }

printk("BIOS-provided physical RAM map:\n");

/*根据打印内存的大小和起始地址和类型*/

print_memory_map(who);

}



setup_arch->setup_memory_region->copy_e820_map



/*

 * Copy the BIOS e820 map into a safe place.

 *

 * Sanity-check it while we're at it..

 *

 * If we're lucky and live on a modern system, the setup code

 * will have given us a memory map that we can use to properly

 * set up memory.  If we aren't, we'll fake a memory map.

 *

 * We check to see that the memory map contains at least 2 elements

 * before we'll use it, because the detection code in setup.S may

 * not be perfect and most every PC known to man has two memory

 * regions: one from 0 to 640k, and one from 1mb up.  (The IBM

 * thinkpad 560x, for example, does not cooperate with the memory

 * detection code.)

 */

static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)

{

/* Only one memory region (or negative)? Ignore it */

if (nr_map < 2)

return -1;



do {

unsigned long long start = biosmap->addr;

unsigned long long size = biosmap->size;

unsigned long long end = start + size;

unsigned long type = biosmap->type;



/* Overflow in 64 bits? Ignore the memory map. */

if (start > end)

return -1;



/*

* Some BIOSes claim RAM in the 640k - 1M region.

* Not right. Fix it up.

*/

/*将跨0xA0000-0x100000的内存分开,这一段是系统使用的*/

if (type == E820_RAM) {

if (start < 0x100000ULL && end > 0xA0000ULL) {

if (start < 0xA0000ULL)

add_memory_region(start, 0xA0000ULL-start, type);

if (end <= 0x100000ULL)

continue;

start = 0x100000ULL;

size = end - start;

}

}

/*记录分配的内存的开始地址,大小和类型*/

add_memory_region(start, size, type);

} while (biosmap++,--nr_map);

return 0;

}





setup_arch->setup_memory_region->copy_e820_map->add_memory_region



void __init add_memory_region(unsigned long long start,

                                  unsigned long long size, int type)

{

int x = e820.nr_map;



if (x == E820MAX) {

   printk("Ooops! Too many entries in the memory map!\n");

   return;

}

/*记录内存分布的开始,大小和类型*/

e820.map[x].addr = start;

e820.map[x].size = size;

e820.map[x].type = type;

e820.nr_map++;

} /* add_memory_region */




start_kernel->setup_arch->parse_mem_cmdline 


static inline void parse_mem_cmdline (char ** cmdline_p)

{

char c = ' ', *to = command_line, *from = COMMAND_LINE;

int len = 0;

int usermem = 0;



/* Save unparsed command line copy for /proc/cmdline */

memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);

saved_command_line[COMMAND_LINE_SIZE-1] = '\0';



for (;;) {

/*

* "mem=nopentium" disables the 4MB page tables.

* "mem=XXX[kKmM]" defines a memory region from HIGH_MEM

* to <mem>, overriding the bios size.

* "mem=XXX[KkmM]@XXX[KkmM]" defines a memory region from

* <start> to <start>+<mem>, overriding the bios size.

*/

/*解析mem=参数*/ 

if (c == ' ' && !memcmp(from, "mem=", 4)) {

if (to != command_line)

to--;

/*如果是mem=nopentium,将PSE的标志去掉*/

if (!memcmp(from+4, "nopentium", 9)) {

from += 9+4;

clear_bit(X86_FEATURE_PSE, &boot_cpu_data.x86_capability);

/*通过exactmap, mem=xxxx@XXXX,完全使用命令行上指定的参数*/

} else if (!memcmp(from+4, "exactmap", 8)) {

from += 8+4;

e820.nr_map = 0;

usermem = 1;

} else {

/* If the user specifies memory size, we

* blow away any automatically generated

* size

*/

unsigned long start_at, mem_size;



/*如果命令行没有指定exactmap参数,将0-0x9f000加入到内存分布表中*/

if (usermem == 0) {

/* first time in: zap the whitelist

* and reinitialize it with the

* standard low-memory region.

*/

e820.nr_map = 0;

usermem = 1;

add_memory_region(0, LOWMEMSIZE(), E820_RAM);

}

/*解析出内存块的大小*/

mem_size = memparse(from+4, &from);

/*解析出内存块的开始地址,如果没指定就用1M做为起始地址*/

if (*from == '@')

start_at = memparse(from+1, &from);

else {

start_at = HIGH_MEMORY;

mem_size -= HIGH_MEMORY;

usermem=0;

}

/*将内存块加入到内存分布表中*/

add_memory_region(start_at, mem_size, E820_RAM);

}

}

c = *(from++);

if (!c)

break;

if (COMMAND_LINE_SIZE <= ++len)

break;

*(to++) = c;

}

*to = '\0';

*cmdline_p = command_line;

if (usermem) {

printk("user-defined physical RAM map:\n");

print_memory_map("user");

}

}

你可能感兴趣的:(kernel)