1. kernel运行的史前时期和内存布局
在arm平台下,zImage.bin压缩镜像是由bootloader加载到物理内存,然后跳到zImage.bin里一段程序,它专门于将被压缩的kernel解压缩到KERNEL_RAM_PADDR开始的一段内存中,接着跳进真正的kernel去执行。该kernel的执行起点是stext函数,定义于arch/arm/kernel/head.S。
在分析stext函数前,先介绍此时内存的布局如下图所示
在开发板tqs3c2440中,SDRAM连接到内存控制器的Bank6中,它的开始内存地址是0x30000000,大小为64M,即0x20000000。 ARM Linux kernel将SDRAM的开始地址定义为PHYS_OFFSET。经bootloader加载kernel并由自解压部分代码运行后,最终kernel被放置到KERNEL_RAM_PADDR(=PHYS_OFFSET + TEXT_OFFSET,即0x30008000)地址上的一段内存,经此放置后,kernel代码以后均不会被移动。
在进入kernel代码前,即bootloader和自解压缩阶段,ARM未开启MMU功能。因此kernel启动代码一个重要功能是设置好相应的页表,并开启MMU功能。为了支持MMU功能,kernel镜像中的所有符号,包括代码段和数据段的符号,在链接时都生成了它在开启MMU时,所在物理内存地址映射到的虚拟内存地址。
以arm kernel第一个符号(函数)stext为例,在编译链接,它生成的虚拟地址是0xc0008000,而放置它的物理地址为0x30008000(还记得这是PHYS_OFFSET+TEXT_OFFSET吗?)。实际上这个变换可以利用简单的公式进行表示:va = pa – PHYS_OFFSET + PAGE_OFFSET。Arm linux最终的kernel空间的页表,就是按照这个关系来建立。
之所以较早提及arm linux 的内存映射,原因是在进入kernel代码,里面所有符号地址值为清一色的0xCXXXXXXX地址,而此时ARM未开启MMU功能,故在执行stext函数第一条执行时,它的PC值就是stext所在的内存地址(即物理地址,0x30008000)。因此,下面有些代码,需要使用地址无关技术。
2.一览stext函数
stext函数定义在Arch/arm/kernel/head.S,它的功能是获取处理器类型和机器类型信息,并创建临时的页表,然后开启MMU功能,并跳进第一个C语言函数start_kernel。
stext函数的在前置条件是:MMU, D-cache, 关闭; r0 = 0, r1 = machine nr, r2 = atags prointer.
代码如下:
- .section ".text.head", "ax"
-
- (stext)
-
-
-
- msr cpsr_c, #PSR_F_BIT | PSR_I_BIT | SVC_MODE @ ensure svc mode
-
- @ and irqs disabled
-
- mrc p15, 0, r9, c0, c0 @ get processor id
-
- bl __lookup_processor_type @ r5=procinfo r9=cupid
-
-
-
- movs r10, r5 @ invalid processor (r5=0)?
-
- beq __error_p @ yes, error 'p'
-
- bl __lookup_machine_type @ r5=machinfo
-
-
-
- movs r8, r5 @ invalid machine (r5=0)?
-
- beq __error_a @ yes, error 'a'
-
-
-
- bl __vet_atags
-
-
-
- bl __create_page_tables
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ldr r13, __switch_data @ address to jump to after
-
- @ mmu has been enabled
-
- adr lr, __enable_mmu @ return (PIC) address
-
- add pc, r10, #PROCINFO_INITFUNC
-
- OC(stext)
- .section ".text.head", "ax"
-
- (stext)
-
-
-
- msr cpsr_c, #PSR_F_BIT | PSR_I_BIT | SVC_MODE @ ensure svc mode
-
- @ and irqs disabled
-
- mrc p15, 0, r9, c0, c0 @ get processor id
-
- bl __lookup_processor_type @ r5=procinfo r9=cupid
-
-
-
- movs r10, r5 @ invalid processor (r5=0)?
-
- beq __error_p @ yes, error 'p'
-
- bl __lookup_machine_type @ r5=machinfo
-
-
-
- movs r8, r5 @ invalid machine (r5=0)?
-
- beq __error_a @ yes, error 'a'
-
-
-
- bl __vet_atags
-
-
-
- bl __create_page_tables
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ldr r13, __switch_data @ address to jump to after
-
- @ mmu has been enabled
-
- adr lr, __enable_mmu @ return (PIC) address
-
- add pc, r10, #PROCINFO_INITFUNC
-
- OC(stext)
3 __lookup_processor_type 函数
__lookup_processor_type 函数是一个非常讲究技巧的函数,如果你将它领会,也将领会kernel了一些魔法。
Kernel代码将所有CPU信息的定义都放到.proc.info.init段中,因此可以认为.proc.info.init段就是一个数组,每个元素都定义了一个或一种CPU的信息。目前__lookup_processor_type使用该元素的前两个字段cpuid和mask来匹配当前CPUID,如果满足CPUID & mask == cpuid,则找到当前cpu的定义并返回。
下面是tqs3c2440开发板,CPU的定义信息,cpuid = 0x41009200,mask = 0xff00fff0。如果是码是运行在tqs3c2440开发板上,那么函数返回下面的定义:
- .section ".proc.info.init", #alloc, #execinstr
-
-
-
- .type __arm920_proc_info,#object
-
- __arm920_proc_info:
-
- .long 0x41009200
-
- .long 0xff00fff0
-
- .long PMD_TYPE_SECT | \
-
- PMD_SECT_BUFFERABLE | \
-
- PMD_SECT_CACHEABLE | \
-
- PMD_BIT4 | \
-
- PMD_SECT_AP_WRITE | \
-
- PMD_SECT_AP_READ
-
- .long PMD_TYPE_SECT | \
-
- PMD_BIT4 | \
-
- PMD_SECT_AP_WRITE | \
-
- PMD_SECT_AP_READ
-
-
-
- b __arm920_setup
-
- .long cpu_arch_name
-
- .long cpu_elf_name
-
- .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
-
- .long cpu_arm920_name
-
- .long arm920_processor_functions
-
- .long v4wbi_tlb_fns
-
- .long v4wb_user_fns
-
- #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-
- .long arm920_cache_fns
-
- #else
-
- .long v4wt_cache_fns
-
- #endif
-
- .size __arm920_proc_info, . - __arm920_proc_info
- .section ".proc.info.init", #alloc, #execinstr
-
-
-
- .type __arm920_proc_info,#object
-
- __arm920_proc_info:
-
- .long 0x41009200
-
- .long 0xff00fff0
-
- .long PMD_TYPE_SECT | \
-
- PMD_SECT_BUFFERABLE | \
-
- PMD_SECT_CACHEABLE | \
-
- PMD_BIT4 | \
-
- PMD_SECT_AP_WRITE | \
-
- PMD_SECT_AP_READ
-
- .long PMD_TYPE_SECT | \
-
- PMD_BIT4 | \
-
- PMD_SECT_AP_WRITE | \
-
- PMD_SECT_AP_READ
-
-
-
- b __arm920_setup
-
- .long cpu_arch_name
-
- .long cpu_elf_name
-
- .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
-
- .long cpu_arm920_name
-
- .long arm920_processor_functions
-
- .long v4wbi_tlb_fns
-
- .long v4wb_user_fns
-
- #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
-
- .long arm920_cache_fns
-
- #else
-
- .long v4wt_cache_fns
-
- #endif
-
- .size __arm920_proc_info, . - __arm920_proc_info
-
-
-
-
-
-
-
-
-
-
-
-
-
- __lookup_processor_type:
-
-
-
- adr r3, 3f
-
-
-
-
-
-
- ldmda r3, {r5 - r7}
-
-
-
- sub r3, r3, r7 @ get offset between virt&phys
-
-
-
- add r5, r5, r3 @ convert virt addresses to
-
-
-
- add r6, r6, r3 @ physical address space
-
-
-
- 1: ldmia r5, {r3, r4} @ value, mask
-
-
-
-
-
- and r4, r4, r9 @ mask wanted bits
-
- teq r3, r4
-
- beq 2f
-
-
-
- add r5, r5, #PROC_INFO_SZ @ sizeof(proc_info_list)
-
-
-
- cmp r5, r6
-
- blo 1b
-
-
-
- mov r5, #0 @ unknown processor
-
- 2: mov pc, lr
-
- ENDPROC(__lookup_processor_type)
-
- .long __proc_info_begin
- .long __proc_info_end
- 3: .long .
- .long __arch_info_begin
- .long __arch_info_end
-
-
-
-
-
-
-
-
-
-
-
-
-
- __lookup_processor_type:
-
-
-
- adr r3, 3f
-
-
-
-
-
-
- ldmda r3, {r5 - r7}
-
-
-
- sub r3, r3, r7 @ get offset between virt&phys
-
-
-
- add r5, r5, r3 @ convert virt addresses to
-
-
-
- add r6, r6, r3 @ physical address space
-
-
-
- 1: ldmia r5, {r3, r4} @ value, mask
-
-
-
-
-
- and r4, r4, r9 @ mask wanted bits
-
- teq r3, r4
-
- beq 2f
-
-
-
- add r5, r5, #PROC_INFO_SZ @ sizeof(proc_info_list)
-
-
-
- cmp r5, r6
-
- blo 1b
-
-
-
- mov r5, #0 @ unknown processor
-
- 2: mov pc, lr
-
- ENDPROC(__lookup_processor_type)
-
- .long __proc_info_begin
- .long __proc_info_end
- 3: .long .
- .long __arch_info_begin
- .long __arch_info_end
4 __lookup_machine_type 函数
__lookup_machine_type 和__lookup_processor_type像对孪生兄弟,它们的行为都是很类似的:__lookup_machine_type根据r1寄存器的机器编号到.arch.info.init段的数组中依次查找机器编号与r1相同的记录。它使了与它孪生兄弟同样的手法进行虚拟地址到物理地址的转换计算。
在介绍函数,我们先分析tqs3c2440开发板的机器信息的定义:
- Arch/arm/include/asm/mach/arch.h
-
- #define MACHINE_START(_type,_name) \
-
- static const struct machine_desc __mach_desc_##_type \
-
- __used \
-
- __attribute__((__section__(".arch.info.init"))) = { \
-
- .nr = MACH_TYPE_##_type, \
-
- .name = _name,
-
-
-
- #define MACHINE_END \
-
- };
- Arch/arm/include/asm/mach/arch.h
-
- #define MACHINE_START(_type,_name) \
-
- static const struct machine_desc __mach_desc_##_type \
-
- __used \
-
- __attribute__((__section__(".arch.info.init"))) = { \
-
- .nr = MACH_TYPE_##_type, \
-
- .name = _name,
-
-
-
- #define MACHINE_END \
-
- };
MACHINE_START宏用于定义一个.arch.info.init段的数组元素。.nr元素就是函数要比较的变量。Tqs3c2440开发板相应的定义如下:
- MACHINE_START(S3C2440, "TQ2440")
-
- .phys_io = S3C2410_PA_UART,
-
- .io_pg_offst = (((u32)S3C24XX_VA_UART) >> 18) & 0xfffc,
-
- .boot_params = S3C2410_SDRAM_PA + 0x100,
-
-
-
- .init_irq = s3c24xx_init_irq,
-
- .map_io = tq2440_map_io,
-
- .init_machine = tq2440_machine_init,
-
- .timer = &s3c24xx_timer,
-
- MACHINE_END
- MACHINE_START(S3C2440, "TQ2440")
-
- .phys_io = S3C2410_PA_UART,
-
- .io_pg_offst = (((u32)S3C24XX_VA_UART) >> 18) & 0xfffc,
-
- .boot_params = S3C2410_SDRAM_PA + 0x100,
-
-
-
- .init_irq = s3c24xx_init_irq,
-
- .map_io = tq2440_map_io,
-
- .init_machine = tq2440_machine_init,
-
- .timer = &s3c24xx_timer,
-
- MACHINE_END
这是一个struct machine_desc结构,在后面的C代码(start_kernel开始执行的代码)会使用该变量对象。在tqs3c2440开发中的__lookup_machine_type函数就是返回该对象指针。
这里涉及很多函数指针,它们都是在start_kernel函数里在各种阶段进行初始化的回函数。如map_io指向的tq2440_map_io就是在建立好内核页表后,再调用它来针对开发板的各种IO端口来建立相关的映射和页表。
至于__loopup_machine_type的代码就不作详细分析,请对比__lookup_processor_type来自行分析。代码如下:
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- __lookup_machine_type:
-
- adr r3, 3b
-
- ldmia r3, {r4, r5, r6}
-
- sub r3, r3, r4 @ get offset between virt&phys
-
- add r5, r5, r3 @ convert virt addresses to
-
- add r6, r6, r3 @ physical address space
-
- 1: ldr r3, [r5, #MACHINFO_TYPE] @ get machine type
-
- teq r3, r1 @ matches loader number?
-
- beq 2f @ found
-
- add r5, r5, #SIZEOF_MACHINE_DESC @ next machine_desc
-
- cmp r5, r6
-
- blo 1b
-
- mov r5, #0 @ unknown machine
-
- 2: mov pc, lr
-
- ENDPROC(__lookup_machine_type)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- __lookup_machine_type:
-
- adr r3, 3b
-
- ldmia r3, {r4, r5, r6}
-
- sub r3, r3, r4 @ get offset between virt&phys
-
- add r5, r5, r3 @ convert virt addresses to
-
- add r6, r6, r3 @ physical address space
-
- 1: ldr r3, [r5, #MACHINFO_TYPE] @ get machine type
-
- teq r3, r1 @ matches loader number?
-
- beq 2f @ found
-
- add r5, r5, #SIZEOF_MACHINE_DESC @ next machine_desc
-
- cmp r5, r6
-
- blo 1b
-
- mov r5, #0 @ unknown machine
-
- 2: mov pc, lr
-
- ENDPROC(__lookup_machine_type)
5. 为kernel建立临时页表
前面提及到,kernel里面的所有符号在链接时,都使用了虚拟地址值。在完成基本的初始化后,kernel代码将跳到第一个C语言函数start_kernl来执行,在哪个时候,这些虚拟地址必须能够对它所存放在真正内存位置,否则运行将为出错。为此,CPU必须开启MMU,但在开启MMU前,必须为虚拟地址到物理地址的映射建立相应的面表。在开启MMU后,kernel指并不马上将PC值指向start_kernl,而是要做一些C语言运行期的设置,如堆栈,重定义等工作后才跳到start_kernel去执行。在此过程中,PC值还是物理地址,因此还需要为这段内存空间建立va = pa的内存映射关系。当然,本函数建立的所有页表都会在将来paging_init销毁再重建,这是临时过度性的映射关系和页表。
在介绍__create_table_pages前,先认识一个macro pgtbl,它将KERNL_RAM_PADDR – 0x4000的值赋给rd寄存器,从下面的使用中可以看它,该值是页表在物理内存的基础,也即页表放在kernel开始地址下的16K的地方。
-
-
- .macro pgtbl, rd
-
- ldr \rd, =(KERNEL_RAM_PADDR - 0x4000)
-
- .endm
-
-
- .macro pgtbl, rd
-
- ldr \rd, =(KERNEL_RAM_PADDR - 0x4000)
-
- .endm
-
-
-
-
-
-
-
-
-
-
-
-
-
- __create_page_tables:
-
-
-
-
- pgtbl r4 @ page table address
-
-
-
- mov r0, r4
- mov r3, #0
- add r6, r0, #0x4000
-
- 1: str r3, [r0], #4
- str r3, [r0], #4
- str r3, [r0], #4
- str r3, [r0], #4
- teq r0, r6
- bne 1b
-
-
-
-
-
-
-
- ldr r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- str r3, [r4, r6, lsl #2] @ identity mapping
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- str r3, [r0, #(KERNEL_START & 0x00f00000) >> 18]!
-
-
-
- ldr r6, =(KERNEL_END - 1)
-
-
-
- add r0, r0, #4
-
-
-
- add r6, r4, r6, lsr #18
- 1: cmp r0, r6
-
-
- add r3, r3, #1 << 20
- strls r3, [r0], #4
- bls 1b
-
- #ifdef CONFIG_XIP_KERNEL
-
- #endif
-
-
-
-
-
-
- add r0, r4, #PAGE_OFFSET >> 18
- orr r6, r7, #(PHYS_OFFSET & 0xff000000)
- .if (PHYS_OFFSET & 0x00f00000)
- orr r6, r6, #(PHYS_OFFSET & 0x00f00000)
- .endif
- str r6, [r0]
-
- #ifdef CONFIG_DEBUG_LL
-
- #if defined(CONFIG_ARCH_NETWINDER) || defined(CONFIG_ARCH_CATS)
-
- #endif
-
- #ifdef CONFIG_ARCH_RPC
-
- #endif
-
- #endif
-
- mov pc, lr
- ENDPROC(__create_page_tables)
-
-
-
-
-
-
-
-
-
-
-
-
-
- __create_page_tables:
-
-
-
-
- pgtbl r4 @ page table address
-
-
-
- mov r0, r4
- mov r3, #0
- add r6, r0, #0x4000
-
- 1: str r3, [r0], #4
- str r3, [r0], #4
- str r3, [r0], #4
- str r3, [r0], #4
- teq r0, r6
- bne 1b
-
-
-
-
-
-
-
- ldr r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags
-
-
-
-
-
-