最近在忙一款PPC处理器的芯片验证和内核移植工作,导致arm-linux启动分析最后一部一直没有写,今天将arm-linux start_kernel之前的最后一部分分析记录下。之前2篇文章链接如下:
http://blog.csdn.net/skyflying2012/article/details/41344377
http://blog.csdn.net/skyflying2012/article/details/41447843
kernel版本号:3.4.55
之前分析到__create_page_tables在内核代码区TEXT_OFF下部的16KB区域内进行页表的配置,完成turn_mmu_on的平映射以及kernel image的线性映射。接下来就需要开启MMU,让整个CPU进入虚拟地址运行的新阶段。head.S中stext最后一段代码如下:
/*
* The following calls CPU specific code in a position independent
* manner. See arch/arm/mm/proc-*.S for details. r10 = base of
* xxx_proc_info structure selected by __lookup_processor_type
* above. On return, the CPU will be ready for the MMU to be
* turned on, and r0 will hold the CPU control register value.
*/
ldr r13, =__mmap_switched @ address to jump to after
@ mmu has been enabled
adr lr, BSYM(1f) @ return (PIC) address
mov r8, r4 @ set TTBR1 to swapper_pg_dir
ARM( add pc, r10, #PROCINFO_INITFUNC )
THUMB( add r12, r10, #PROCINFO_INITFUNC )
THUMB( mov pc, r12 )
1: b __enable_mmu
ENDPROC(stext)
看注释也可以明白接下来要完成的2件工作:执行CPU特定处理代码,开启MMU。
在第一篇分析中我们知道r10中存储着本CPU的proc_info_list首地址。到这里需要再详细解释下内核的proc
info机制。在kernel image中定义有一个.proc.info.init的段。在arch/arm/kernel/vmlinux.lds.S中,如下:
#define PROC_INFO \ . = ALIGN(4); \ VMLINUX_SYMBOL(__proc_info_begin) = .; \ *(.proc.info.init) \ VMLINUX_SYMBOL(__proc_info_end) = .;
__proc_info_begin和__pro_info_end分别代表该段的头尾。.proc.info.init段中存储的数据是在arch/arm/mm/proc-xxx.S中定义的。到底使用哪个proc-xxx.S则由处理器的指令集版本号决定。
以我的cortex-A8处理器为例,是armv7指令集,根据arch/arm/mm/Makefile。
obj-$(CONFIG_CPU_V6) += proc-v6.o
obj-$(CONFIG_CPU_V6K) += proc-v6.o
obj-$(CONFIG_CPU_V7) += proc-v7.o
编译的是proc-v7.S,在该文件中有如下一段汇编:
.section ".rodata"
string cpu_arch_name, "armv7"
string cpu_elf_name, "v7"
.align
//接下来定义的数据都在.proc.info.init段中
.section ".proc.info.init", #alloc, #execinstr
/* * Standard v7 proc info content */
//定义了宏定义__v7_proc,这个宏定义非常重要!
.macro __v7_proc initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0
ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \
PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags)
ALT_UP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \
PMD_SECT_AF | PMD_FLAGS_UP | \mm_mmuflags)
.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | \
PMD_SECT_AP_READ | PMD_SECT_AF | \io_mmuflags
W(b) \initfunc
.long cpu_arch_name
.long cpu_elf_name
.long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \
HWCAP_EDSP | HWCAP_TLS | \hwcaps
.long cpu_v7_name
.long v7_processor_functions
.long v7wbi_tlb_fns
.long v6_user_fns
.long v7_cache_fns
.endm
//没有选择LPAE
#ifndef CONFIG_ARM_LPAE
/* * ARM Ltd. Cortex A5 processor. */
.type __v7_ca5mp_proc_info, #object
__v7_ca5mp_proc_info:
.long 0x410fc050
.long 0xff0ffff0
__v7_proc __v7_ca5mp_setup
.size __v7_ca5mp_proc_info, . - __v7_ca5mp_proc_info
/* * ARM Ltd. Cortex A9 processor. */
.type __v7_ca9mp_proc_info, #object
__v7_ca9mp_proc_info:
.long 0x410fc090
.long 0xff0ffff0
__v7_proc __v7_ca9mp_setup
.size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info
#endif /* CONFIG_ARM_LPAE */
/* * ARM Ltd. Cortex A7 processor. */
.type __v7_ca7mp_proc_info, #object
__v7_ca7mp_proc_info:
.long 0x410fc070
.long 0xff0ffff0
__v7_proc __v7_ca7mp_setup, hwcaps = HWCAP_IDIV
.size __v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info
/* * ARM Ltd. Cortex A15 processor. */
.type __v7_ca15mp_proc_info, #object
__v7_ca15mp_proc_info:
.long 0x410fc0f0
.long 0xff0ffff0
__v7_proc __v7_ca15mp_setup, hwcaps = HWCAP_IDIV
.size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info
/* * Match any ARMv7 processor core. */
.type __v7_proc_info, #object
__v7_proc_info:
.long 0x000f0000 @ Required ID value
.long 0x000f0000 @ Mask for ID
__v7_proc __v7_setup
.size __v7_proc_info, . - __v7_proc_info
这段汇编看出,指定.proc.info.init段中存储的是一些结构体,定义了V7指令集特定处理器的属性和处理函数。在C文件中我们找到了这些结构体的定义,在arch/arm/include/asm/proc-info.h中:
/* * Note! struct processor is always defined if we're * using MULTI_CPU, otherwise this entry is unused, * but still exists. * * NOTE! The following structure is defined by assembly * language, NOT C code. For more information, check: * arch/arm/mm/proc-*.S and arch/arm/kernel/head.S */
struct proc_info_list {
unsigned int cpu_val;
unsigned int cpu_mask;
unsigned long __cpu_mm_mmu_flags; /* used by head.S */
unsigned long __cpu_io_mmu_flags; /* used by head.S */
unsigned long __cpu_flush; /* used by head.S */
const char *arch_name;
const char *elf_name;
unsigned int elf_hwcap;
const char *cpu_name;
struct processor *proc;
struct cpu_tlb_fns *tlb;
struct cpu_user_fns *user;
struct cpu_cache_fns *cache;
};
proc-v7.S中定义的__v7_ca5mp_proc_info __v7_ca9mp_proc_info __v7_ca7mp_proc_info __v7_ca15mp_proc_info __v7_proc_info都是proc_info_list结构体,分别对应A5 A9 A7
A15以及其他v7指令集处理器,这些结构体定义时都使用了__v7_proc宏定义来完成成员变量赋值。
为了验证这些proc_info_list结构体都在.proc.info.init段中,我对编译生成的vmlinux的.proc.info.init段进行反汇编。如下:
#arm-linux-objdump -dS --section=.proc.info.init vmlinux
vmlinux: file format elf32-littlearm
Disassembly of section .init.proc.info:
c03d98cc <__proc_info_begin>:
c03d98cc: 410fc050 .word 0x410fc050
c03d98d0: ff0ffff0 .word 0xff0ffff0
c03d98d4: 00000c0e .word 0x00000c0e
c03d98d8: 00000c02 .word 0x00000c02
*/
.type __v7_ca5mp_proc_info, #object
__v7_ca5mp_proc_info:
.long 0x410fc050
.long 0xff0ffff0
__v7_proc __v7_ca5mp_setup
c03d98dc: eafff872 b c03d7aac <__v7_ca5mp_setup>
c03d98e0: c02f07a4 .word 0xc02f07a4
c03d98e4: c02f07aa .word 0xc02f07aa
c03d98e8: 00008097 .word 0x00008097
c03d98ec: c0017b98 .word 0xc0017b98
c03d98f0: c03db2c0 .word 0xc03db2c0
c03d98f4: c03c52c8 .word 0xc03c52c8
c03d98f8: c03db2b8 .word 0xc03db2b8
c03d98fc: c03db290 .word 0xc03db290
c03d9900 <__v7_ca9mp_proc_info>:
c03d9900: 410fc090 ff0ffff0 00000c0e 00000c02 ...A............
*/
.type __v7_ca9mp_proc_info, #object
__v7_ca9mp_proc_info:
.long 0x410fc090
.long 0xff0ffff0
__v7_proc __v7_ca9mp_setup
c03d9910: eafff865 c02f07a4 c02f07aa 00008097 e...../.../.....
c03d9920: c0017b98 c03db2c0 c03c52c8 c03db2b8 .{....=..R<...=.
c03d9930: c03db290 ..=.
c03d9934 <__v7_ca7mp_proc_info>:
c03d9934: 410fc070 ff0ffff0 00000c0e 00000c02 p..A............
*/
.type __v7_ca7mp_proc_info, #object
__v7_ca7mp_proc_info:
.long 0x410fc070
.long 0xff0ffff0
__v7_proc __v7_ca7mp_setup, hwcaps = HWCAP_IDIV
c03d9944: eafff85a c02f07a4 c02f07aa 00068097 Z...../.../.....
c03d9954: c0017b98 c03db2c0 c03c52c8 c03db2b8 .{....=..R<...=.
c03d9964: c03db290 ..=.
c03d9968 <__v7_ca15mp_proc_info>:
c03d9968: 410fc0f0 ff0ffff0 00000c0e 00000c02 ...A............
*/
.type __v7_ca15mp_proc_info, #object
__v7_ca15mp_proc_info:
.long 0x410fc0f0
.long 0xff0ffff0
__v7_proc __v7_ca15mp_setup, hwcaps = HWCAP_IDIV
c03d9978: eafff84d c02f07a4 c02f07aa 00068097 M...../.../.....
c03d9988: c0017b98 c03db2c0 c03c52c8 c03db2b8 .{....=..R<...=.
c03d9998: c03db290 ..=.
c03d999c <__v7_proc_info>:
c03d999c: 000f0000 000f0000 00000c0e 00000c02 ................
*/
.type __v7_proc_info, #object
__v7_proc_info:
.long 0x000f0000 @ Required ID value
.long 0x000f0000 @ Mask for ID
__v7_proc __v7_setup
c03d99ac: eafff841 c02f07a4 c02f07aa 00008097 A...../.../.....
c03d99bc: c0017b98 c03db2c0 c03c52c8 c03db2b8 .{....=..R<...=.
c03d99cc: c03db290 ..=.
很显然,跟我们分析的一样,.proc.info.init段中存储的就是proc-v7.S中定义的proc_info_list结构体。
第一篇分析中指出__lookup_processor_type函数即遍历.proc.info.init段,根据CP15
c0寄存器读到的处理器版本号进行匹配。将匹配到的proc_info_list结构体存在r10中。
根据实际的运行情况,我的A8处理器遍历后匹配到的是__v7_proc_info,因此r10寄存器存储的是__v7_proc_info结构体首地址。
.proc.info.init段先解释到这里,回到stext中,r13中存下__mmap_switched地址,需要注意的是该地址是链接地址,第二篇说过现在整个kernel image还是运行在物理地址上,因此该地址跳转必须等到开启MMU之后了。接下来对于ARM指令(不是THUMB指令),跳转到__v7_proc_info->__cpu_flush执行。
proc-v7.S中分析过,__v7_proc_info中使用__v7_proc宏定义定义了其中的一些成员变量,其中__cpu_flush=__v7_setup.__v7_setup在proc-v7.S中定义如下:
__v7_setup:
adr r12, __v7_setup_stack @ the local stack
stmia r12, {r0-r5, r7, r9, r11, lr}
bl v7_flush_dcache_all
ldmia r12, {r0-r5, r7, r9, r11, lr}
mrc p15, 0, r0, c0, c0, 0 @ read main ID register
and r10, r0, #0xff000000 @ ARM?
teq r10, #0x41000000
bne 3f
and r5, r0, #0x00f00000 @ variant
and r6, r0, #0x0000000f @ revision
orr r6, r6, r5, lsr #20-4 @ combine variant and revision
ubfx r0, r0, #4, #12 @ primary part number
/* Cortex-A8 Errata */
ldr r10, =0x00000c08 @ Cortex-A8 primary part number
teq r0, r10
bne 2f
#ifdef CONFIG_ARM_ERRATA_430973
teq r5, #0x00100000 @ only present in r1p*
mrceq p15, 0, r10, c1, c0, 1 @ read aux control register
orreq r10, r10, #(1 << 6) @ set IBE to 1
mcreq p15, 0, r10, c1, c0, 1 @ write aux control register
#endif
#ifdef CONFIG_ARM_ERRATA_458693
teq r6, #0x20 @ only present in r2p0
mrceq p15, 0, r10, c1, c0, 1 @ read aux control register
orreq r10, r10, #(1 << 5) @ set L1NEON to 1
orreq r10, r10, #(1 << 9) @ set PLDNOP to 1
mcreq p15, 0, r10, c1, c0, 1 @ write aux control register
#endif
#ifdef CONFIG_ARM_ERRATA_460075
teq r6, #0x20 @ only present in r2p0
mrceq p15, 1, r10, c9, c0, 2 @ read L2 cache aux ctrl register
tsteq r10, #1 << 22
orreq r10, r10, #(1 << 22) @ set the Write Allocate disable bit
mcreq p15, 1, r10, c9, c0, 2 @ write the L2 cache aux ctrl register
#endif
b 3f
/* Cortex-A9 Errata */
2: ldr r10, =0x00000c09 @ Cortex-A9 primary part number
teq r0, r10
bne 3f
#ifdef CONFIG_ARM_ERRATA_742230
cmp r6, #0x22 @ only present up to r2p2
mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register
orrle r10, r10, #1 << 4 @ set bit #4
mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register
#endif
#ifdef CONFIG_ARM_ERRATA_742231
teq r6, #0x20 @ present in r2p0
teqne r6, #0x21 @ present in r2p1
teqne r6, #0x22 @ present in r2p2
mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register
orreq r10, r10, #1 << 12 @ set bit #12
orreq r10, r10, #1 << 22 @ set bit #22
mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register
#endif
#ifdef CONFIG_ARM_ERRATA_743622
teq r5, #0x00200000 @ only present in r2p*
mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register
orreq r10, r10, #1 << 6 @ set bit #6
mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register
#endif
#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP)
ALT_SMP(cmp r6, #0x30) @ present prior to r3p0
ALT_UP_B(1f)
mrclt p15, 0, r10, c15, c0, 1 @ read diagnostic register
orrlt r10, r10, #1 << 11 @ set bit #11
mcrlt p15, 0, r10, c15, c0, 1 @ write diagnostic register
1:
#endif
3: mov r10, #0
mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate
dsb
#ifdef CONFIG_MMU
mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs
v7_ttb_setup r10, r4, r8, r5 @ TTBCR, TTBRx setup
ldr r5, =PRRR @ PRRR
ldr r6, =NMRR @ NMRR
mcr p15, 0, r5, c10, c2, 0 @ write PRRR
mcr p15, 0, r6, c10, c2, 1 @ write NMRR
#endif
#ifndef CONFIG_ARM_THUMBEE
mrc p15, 0, r0, c0, c1, 0 @ read ID_PFR0 for ThumbEE
and r0, r0, #(0xf << 12) @ ThumbEE enabled field
teq r0, #(1 << 12) @ check if ThumbEE is present
bne 1f
mov r5, #0
mcr p14, 6, r5, c1, c0, 0 @ Initialize TEEHBR to 0
mrc p14, 6, r0, c0, c0, 0 @ load TEECR
orr r0, r0, #1 @ set the 1st bit in order to
mcr p14, 6, r0, c0, c0, 0 @ stop userspace TEEHBR access
1:
#endif
adr r5, v7_crval
ldmia r5, {r5, r6}
#ifdef CONFIG_CPU_ENDIAN_BE8
orr r6, r6, #1 << 25 @ big-endian page tables
#endif
#ifdef CONFIG_SWP_EMULATE
orr r5, r5, #(1 << 10) @ set SW bit in "clear"
bic r6, r6, #(1 << 10) @ clear it in "mmuset"
#endif
mrc p15, 0, r0, c1, c0, 0 @ read control register
bic r0, r0, r5 @ clear bits them
orr r0, r0, r6 @ set them
THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions
mov pc, lr @ return to head.S:__ret
ENDPROC(__v7_setup)
初看到这段汇编肯定头大,但是仔细过一遍就会好些,大部分都是一些特定处理器的条件编译并且还有一些跳转label,很多代码可以忽略,对于A8处理器有用的部分我加粗标注出来了。完成的工作如下:
(1)刷新cache,v7_flush_dcache_all
(2)无效掉cache
(3)无效掉指令和数据TLB
(4)从CP15 c0寄存器读出到r0,进行处理,做为下一步写入CP15 c0的值
说实话这部分汇编内核开发者不需要修改,除非是你所用的处理器做了处理器核级的修改,我们只需要了解即可。这里最关心的是r0的数据,因为接下来写如CP15的c0寄存器启动MMU,值就是来自r0.
r0从CP15 c0获取后,首先清掉r5的置位,然后置位r6的置位,r5 r6来自于v7_crval前8个字节,v7_crval在proc-v7-2level.S和proc-v7-3level.S中分别由定义,到底使用哪个是在proc-info.S中条件编译指定的。
#ifdef CONFIG_ARM_LPAE
#include "proc-v7-3level.S"
#else
#include "proc-v7-2level.S"
#endif
我的A8处理器选择使用proc-v7-2level.S,v7_crval定义如下:
v7_crval: crval clear=0x0120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c
要明白这里定义的v7_crval值含义就需要看CP15 c0寄存器的定义,限于篇幅这里不再详细解释了,感兴趣的朋友可以看我的另一篇说明ARM CP15协处理器的博文:
http://blog.csdn.net/skyflying2012/article/details/25823967
__v7_setup执行完毕,回到stext,接下来跳转到__enable_mmu,也在head.S中,如下:
/* * Setup common bits before finally enabling the MMU. Essentially * this is just loading the page table pointer and domain access * registers. * * r0 = cp#15 control register * r1 = machine ID * r2 = atags or dtb pointer * r4 = page table pointer * r9 = processor ID * r13 = *virtual* address to jump to upon completion */
__enable_mmu:
/* * turn on L2 cache */
mov r5, #0x82
mcr p15, 1, r5, c9, c0, 2
mrc p15, 0, r5, c1, c0, 1
orr r5, r5, #CR_L2
mcr p15, 0, r5, c1, c0, 1
#if defined(CONFIG_ALIGNMENT_TRAP) && __LINUX_ARM_ARCH__ < 6
orr r0, r0, #CR_A
#else
bic r0, r0, #CR_A
#endif
#ifdef CONFIG_CPU_DCACHE_DISABLE
bic r0, r0, #CR_C
#endif
#ifdef CONFIG_CPU_BPREDICT_DISABLE
bic r0, r0, #CR_Z
#endif
#ifdef CONFIG_CPU_ICACHE_DISABLE
bic r0, r0, #CR_I
#endif
#ifdef CONFIG_ARM_LPAE
mov r5, #0
mcrr p15, 0, r4, r5, c2 @ load TTBR0
#else
mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
domain_val(DOMAIN_IO, DOMAIN_CLIENT))
mcr p15, 0, r5, c3, c0, 0 @ load domain access register
mcr p15, 0, r4, c2, c0, 0 @ load page table pointer
#endif
b __turn_mmu_on
ENDPROC(__enable_mmu)
该函数根据内核配置对CP15和r0寄存器进行配置,工作如下:
(1)操作CP15的c1寄存器打开L2 cache
(2)根据内核配置,关闭指令或者数据cache
(3)配置CP15 c3寄存器,配置ARM域的访问权限
(4)配置CP15 c2寄存器,指定内存页表地址
需要注意的是前面__v7_setup中只是无效掉cache,无效是cache操作一种,表示cache中数据无效了,下次由cache读取数据,则需要cache从内存中重新获取数据。这是保证cache数据一致性的手段。
接下来再次跳转到__turn_mmu_on执行,在head.S中如下:
/*
* Enable the MMU. This completely changes the structure of the visible
* memory space. You will not be able to trace execution through this.
* If you have an enquiry about this, *please* check the linux-arm-kernel
* mailing list archives BEFORE sending another post to the list.
*
* r0 = cp#15 control register
* r1 = machine ID
* r2 = atags or dtb pointer
* r9 = processor ID
* r13 = *virtual* address to jump to upon completion
*
* other registers depend on the function called upon completion
*/
.align 5
.pushsection .idmap.text, "ax"
ENTRY(__turn_mmu_on)
mov r0, r0
instr_sync
mcr p15, 0, r0, c1, c0, 0 @ write control reg
mrc p15, 0, r3, c0, c0, 0 @ read id reg
instr_sync
mov r3, r3
mov r3, r13
mov pc, r3
__turn_mmu_on_end:
ENDPROC(__turn_mmu_on)
.popsection
该函数将r0值写入CP15的c0寄存器,查看CP15说明,c0寄存器的第0位表征MMU的开启,刚才__v7_setup中v7_crval给出的mmuset值已经将第0位置1了,因此写入c0后MMU就开启了!
在第二篇启动分析文章中讲到create pgtable时,对__turn_mmu_on所在的1M地址空间做了平映射,到现在这个阶段就看出其作用了。
CPU执行完成“mcr p15, 0, r0, c1, c0,0”指令后开启MMU,接下来CPU取指地址是当前pc+4,由于做了平映射,虽然接下来是虚拟地址,但是该虚拟地址跟之前的物理地址是完全一致的,pc+4可以取到mrc p15, 0, r3, c0, c0, 0指令,并不会导致CPU取指上的问题,
可以想象如果做的不是平映射,CPU取pc+4指令,该虚拟地址不是映射到mrc p15, 0, r3,c0, c0, 0指令所在物理地址,CPU接下来的执行就不可预测了。
开启MMU后,修改PC值为r13执行,也就是__mmap_switched,该函数已经是运行在MMU之上的虚拟地址了,因此不需要位置无关代码,如下:
/* * The following fragment of code is executed with the MMU on in MMU mode, * and uses absolute addresses; this is not position independent. * * r0 = cp#15 control register * r1 = machine ID * r2 = atags/dtb pointer * r9 = processor ID */
__INIT
__mmap_switched:
adr r3, __mmap_switched_data
//拷贝data段
ldmia r3!, {r4, r5, r6, r7}
cmp r4, r5 @ Copy data segment if needed
1: cmpne r5, r6
ldrne fp, [r4], #4
strne fp, [r5], #4
bne 1b
//清空bss
mov fp, #0 @ Clear BSS (and zero fp)
1: cmp r6, r7
strcc fp, [r6],#4
bcc 1b
//加载r3之后的5个int到r4-sp中
ARM( ldmia r3, {r4, r5, r6, r7, sp})
THUMB( ldmia r3, {r4, r5, r6, r7} )
THUMB( ldr sp, [r3, #16] )
//保存数据到全局变量中
str r9, [r4] @ Save processor ID
str r1, [r5] @ Save machine type
str r2, [r6] @ Save atags pointer
bic r4, r0, #CR_A @ Clear 'A' bit
stmia r7, {r0, r4} @ Save control register values
b start_kernel
ENDPROC(__mmap_switched)
.align 2
.type __mmap_switched_data, %object
__mmap_switched_data:
.long __data_loc @ r4
.long _sdata @ r5
.long __bss_start @ r6
.long _end @ r7
.long processor_id @ r4
.long __machine_arch_type @ r5
.long __atags_pointer @ r6
.long cr_alignment @ r7
.long init_thread_union + THREAD_START_SP @ sp
.size __mmap_switched_data, . - __mmap_switched_data
__mmap_switched完成的工作如下:
(1)如果有必要则进行data段的拷贝搬移
(2)清空bss段
(3)保存处理器ID atag地址等到指定全局变量中,设置SP,跳转到start_kernel。
看到start_kernel说明我们的分析马上就要结束了,不过这里还是有2个地方需要思考下:
(1)全局变量赋值后,processor_id = r9,__machine_arch_type = r1, __atags_pointer = r2.
start_kernel中对atag等进行分析处理时直接对这些变量进行操作就可以。
(2)sp = init_thread_union + THREAD_START_SP,设置栈指针,为接下来的C函数运行做准备。对于arm-linux来说,start_kernel之前全部都是汇编代码。
init_thread_union代表的是内核第一个进程,pid = 0,该进程是内核人为造出来的,而不是fork出来的,在arch/arm/kernel/init_task.c中定义了该进程,
/*
* Initial thread structure.
*
* We need to make sure that this is 8192-byte aligned due to the
* way process stacks are handled. This is done by making sure
* the linker maps this in the .text segment right after head.S,
* and making head.S ensure the proper alignment.
*
* The things we do for performance..
*/
union thread_union init_thread_union __init_task_data =
{ INIT_THREAD_INFO(init_task) };
thread_union定义如下:
union thread_union {
struct thread_info thread_info;
unsigned long stack[THREAD_SIZE/sizeof(long)];
};
其中定义了THREAD_SIZE(8KB)的静态栈空间,__mmap_switched将sp设置在了内核第一进程的栈顶部,栈向下生长,因此接下来start_kernel就运行在该内核栈。
直到rest_init中调用kernel_thread创建进程kernel_init。kernel_init的pid = 1.
__mmap_switched最后跳转到start_kernel开始进入C函数运行环境,这时整个kernel image已经运行在虚拟地址之上,运行地址 链接地址保持了一致,内核运行进入了新时代!
arm-linux启动到start_kernel的汇编运行过程就分析到这。
这段时间做PPC处理器的内核移植,对PPC的启动过程也学习了一番,有时间也将PPC-linux启动到start_kernel的过程加以对比分析!