- bootloader在跳转到kernel前,需要确保如下设置:
MMU = off, D-cache = off, I-cache = on or off
x0 = physical address to the FDT blob
- kernel的入口在arch\arm64\kernel\head.S中。
- stext。
ENTRY(stext)
mov x21, x0 // x21=FDT 将x0的值(device tree的地址)暂存在x21寄存器中
bl el2_setup // Drop to EL1, w20=cpu_boot_mode 从EL2或者non-secure EL1回退到EL1;bl 带返回的跳转指令
// 补充知识:ARMv8-a 划分了4 个Exception level,EL0归属于non-privilege level,EL1/2/3属于privilege level。Application位于特权等级最低的EL0,Guest OS(Linux kernel、window等)位于EL1,提供虚拟化支持的Hypervisor位于EL2(可选),提供Security支持的Seurity Monitor位于EL3(可选)。EL0,EL1,EL2,El3之前的切换,分别通过指令svc,hvc,smc。
bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET 计算出起始物理地址并保存在x24中
bl set_cpu_boot_mode_flag // 将cpu启动的模式保存到全局变量__boot_cpu_mode中
mrs x22, midr_el1 // Move from State register to Register,x22=cpuid 获取当前cpu id
mov x0, x22 // 将当前cpuid作为参数传递给lookup_processor_type
bl lookup_processor_type // 查看cpu类型
mov x23, x0 // x23=current cpu_table
/*
* __error_p may end up out of range for cbz if text areas are
* aligned up to section sizes.
*/
cbnz x23, 1f // invalid processor (x23=0)? // x23非0,跳转到标号1处
b __error_p
1:
bl __vet_fdt // 检查device tree的合法性
bl __create_page_tables // x25=TTBR0, x26=TTBR1 // 创建临时页表
/*
* The following calls CPU specific code in a position independent
* manner. See arch/arm64/mm/proc.S for details. x23 = base of
* cpu_info structure selected by lookup_processor_type above.
* On return, the CPU will be ready for the MMU to be turned on and
* the TCR will have been set.
*/
ldr x27, __switch_data // address to jump to after // 由函数__enable_mmu中调用
// MMU has been enabled
adrp lr, __enable_mmu // return (PIC) address //
// adr伪指令用于将一个地址加载到寄存器,取到的是相对于PC寄存器的地址,由于此刻PC寄存器中值是物理地址,所以lr中取到的即是标号
__enable_mmu
处的物理地址(页对齐)。
add lr, lr, #:lo12:__enable_mmu // 加上
__enable_mmu的页内偏移
ldr x12, [x23, #CPU_INFO_SETUP] // x23保存的是
cpu_table,
CPU_INFO_SETUP =
offsetof(struct cpu_info, cpu_setup),那么x12就等于
__cpu_setup
add x12, x12, x28 // __virt_to_phys // 转化成物理地址
br x12 // initialise processor // 跳转到
__cpu_setup继续执行,由于lr等于
__enable_mmu,因此从
__cpu_setup时,会跳转到
__enable_mmu继续执行
ENDPROC(stext)
- el2_setup。
/*
* If we're fortunate enough to boot at EL2, ensure that the world is
* sane before dropping to EL1.
*
* Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if
* booted in EL1 or EL2 respectively.
*/
ENTRY(el2_setup)
mrs x0, CurrentEL // 当前的exception level保存在PSTATE中,程序可以通过MRS或者MSR来访问PSTATE,CurrentEL就是获取PSTATE中current exception level域的特殊寄存器。
// 补充知识:MRS Move to Register from State register, MSR Move from State register to Register, 分别用于对状态寄存器(AARCH64对应PSTATE,AARCH32对应CPSP)进行读写。
cmp x0, #CurrentEL_EL2 // #define CurrentEL_EL2 (2 << 2)
b.ne 1f // 当cpu不处于EL2时跳转到标号1
mrs x0, sctlr_el2 // 读取EL2状态的系统控制寄存器,该寄存器可以控制整个系统的行为
CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 // 大端置位EE位
CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2
// 小端清除EE位
msr sctlr_el2, x0 // 写入
EL2状态的系统控制寄存器
b 2f
1: mrs x0, sctlr_el1
// 读取EL1状态的系统控制寄存器
CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1 // 大端置位EE位和EOE位
CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 // 小端清除EE位和EOE位
msr sctlr_el1, x0
// 写入
EL1状态的系统控制寄存器
mov w20, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 // #define BOOT_CPU_MODE_EL1 (0xe11) // 将
BOOT_CPU_MODE_EL1保存到通用寄存器w20
isb // 指令同步屏障,确保之前的指令已执行完成
ret // 当cpu处于EL1,在这里就返回了
/* Hyp configuration. */
2: mov x0, #(1 << 31) // 64-bit EL1
msr hcr_el2, x0 // Hypervisor
Configuration
Register // Controls virtualization settings and trapping of exceptions to
EL2
/* Generic timers. */ // 设置
Generic timers
mrs x0, cnthctl_el2 // Counter-timer Hyp Control register
orr x0, x0, #3 // Enable EL1 physical timers
msr cnthctl_el2, x0
msr cntvoff_el2, xzr // Clear virtual offset // Counter-timer Virtual Offset register // xzr零寄存器,顾名思义值为0
#ifdef CONFIG_ARM_GIC_V3
/* GICv3 system register access */ // 配置通用中断控制器GIC V3
mrs x0, id_aa64pfr0_el1 // AArch64 Processor Feature Register 0
ubfx x0, x0, #24, #4 // ubfx: ARM位域提取指令,取x0的[27:24]bit
cmp x0, #1 // 判断是否支持GIC V3和GIC V4
b.ne 3f // 不支持的话,跳转到标号3
mrs_s x0, ICC_SRE_EL2 // Interrupt Controller System Register Enable register (EL2)
orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 // 通过system register方式访问GIC
orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 // EL1可以直接访问GIC
msr_s ICC_SRE_EL2, x0
isb // Make sure SRE is now set //
指令同步屏障,确保之前的指令已执行完成
msr_s ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults
3:
#endif
/* Populate ID registers. */
mrs x0, midr_el1 // Main ID Register 描述PE信息的寄存器 // PE: Processing Element, 处理单元
mrs x1, mpidr_el1 // Multiprocessor Affinity Register
msr vpidr_el2, x0 // 将Main ID Register 写入到对应的virtual processor的寄存器
msr vmpidr_el2, x1
/* sctlr_el1 */
mov x0, #0x0800 // Set/clear RES{1,0} bits
CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems // 大端0x33d00800
CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems // 小端0x30d00800
msr sctlr_el1, x0 // 除了EE和EOE,其他几位都没查到,囧!
/* Coprocessor traps. */
mov x0, #0x33ff
msr cptr_el2, x0 // Disable copro. traps to EL2 // Architectural Feature Trap Register
#ifdef CONFIG_COMPAT // 是否支持64位kernel上运行32位的application
msr hstr_el2, xzr // Disable CP15 traps to EL2
#endif
/* Stage-2 translation */
msr vttbr_el2, xzr // Virtualization Translation Table Base Register
/* Hypervisor stub */
adrp x0, __hyp_stub_vectors // 读取
__hyp_stub_vectors
相对于当前PC的地址,页对齐
add x0, x0, #:lo12:__hyp_stub_vectors // 处理页内偏移
msr vbar_el2, x0 // 设置EL2的异常向量表的基地址
/* spsr */
mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
msr spsr_el2, x0 // 设置SPSR_EL2,Saved Program Status Register (EL2)的缺省值
msr elr_el2, lr // 设置Exception Link Register (EL2)的缺省值
mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 //
将
BOOT_CPU_MODE_EL2保存到通用寄存器w20
eret // eret指令是用来返回发生exception的现场,执行该指令会使得CPU返回EL1状态,并且将
spsr_el2
的值赋给PSTATE,
elr_el2的值赋给pc
ENDPROC(el2_setup)
- __calc_phys_offset。
/*
* Calculate the start of physical memory.
*/
__calc_phys_offset:
adr x0, 1f // adr伪指令用于将一个地址加载到寄存器,取到的是相对于PC寄存器的地址,由于此刻PC寄存器中值是物理地址,所以x0中取到的即是标号1处的物理地址。
ldp x1, x2, [x0] // 将标号1处的的前八字节给x1,后八字节给x2
sub x28, x0, x1 // x28 = PHYS_OFFSET - PAGE_OFFSET 物理地址减去虚拟地址,即x28中保存的是物理地址相对虚拟地址的偏移
add x24, x2, x28 // x24 = PHYS_OFFSET 计算出kernel起始的物理地址给x24
ret
ENDPROC(__calc_phys_offset)
.align 3
1: .quad . // 用来定义一个quad word也就是4字(8字节),“.”表示当前虚拟地址
.quad PAGE_OFFSET // PAGE_OFFSET - the virtual address of the start of the kernel image // kernel起始的虚拟内存地址,定义在kernel/arch/arm64/include/asm/memory.h中,我们使用CONFIG_ARM64_VA_BITS为39,PAGE_OFFSET等于0xffffffc000000000
- set_cpu_boot_mode_flag。
/*
* Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
* in x20. See arch/arm64/include/asm/virt.h for more info.
*/
ENTRY(set_cpu_boot_mode_flag)
ldr x1, =__boot_cpu_mode // Compute __boot_cpu_mode 将__boot_cpu_mode的地址赋给x1
add x1, x1, x28 // 从虚拟地址转换到物理地址
cmp w20, #BOOT_CPU_MODE_EL2 // 判断当前cpu是否处于EL2
b.ne 1f
add x1, x1, #4
1: str w20, [x1] // This CPU has booted in EL1 将当前EL存入__boot_cpu_mode
dmb sy // Data Memory Barrier,sy表示是完整的dmb操作
dc ivac, x1 // Invalidate potentially stale cache line
ret
ENDPROC(set_cpu_boot_mode_flag)
- lookup_processor_type。
struct cpu_info {
unsigned int cpu_id_val;
unsigned int cpu_id_mask;
const char *cpu_name;
unsigned long (*cpu_setup)(void);
};
struct cpu_info cpu_table[] = {
{
.cpu_id_val = 0x000f0000,
.cpu_id_mask = 0x000f0000,
.cpu_name = "AArch64 Processor",
.cpu_setup = __cpu_setup,
},
{ /* Empty */ },
};
/*
* This function gets the processor ID in w0 and searches the cpu_table[] for
* a match. It returns a pointer to the struct cpu_info it found. The
* cpu_table[] must end with an empty (all zeros) structure.
*
* This routine can be called via C code and it needs to work with the MMU
* both disabled and enabled (the offset is calculated automatically).
*/
ENTRY(lookup_processor_type)
adr x1, __lookup_processor_type_data // 把标号__lookup_processor_type_data的物理地址赋给x1
ldp x2, x3, [x1] // 从x1依次读入两个64位数据到x2,x3中,x2保存的是__lookup_processor_type_data的虚拟地址,x3保存的是cpu_table的虚拟地址
sub x1, x1, x2 // get offset between VA and PA
add x3, x3, x1 // convert VA to PA // 计算出
cpu_table的物理地址
1:
ldp w5, w6, [x3] // load cpu_id_val and cpu_id_mask , 通用寄存器用w表示说是在32位上下文中使用,用x表示说明是在64位上下文中使用
cbz w5, 2f // end of list? 比较w5(
cpu_id_val
),如果结果为0就跳转(只能跳转到后面的指令);查看cpu_table即知这里w5的值为0x000f0000
and w6, w6, w0 // w0中的值为当前cpu id // [19:16]bit表示Architecture,ARM64是0b1111
cmp w5, w6 // w5 == w6,
0x000f0000
b.eq 3f // 跳转到标号3处
add x3, x3, #CPU_INFO_SZ
b 1b
2:
mov x3, #0 // unknown processor
3:
mov x0, x3 // 将
cpu_table的物理地址赋值给x0
ret
ENDPROC(lookup_processor_type)
.align 3
.type __lookup_processor_type_data, %object
__lookup_processor_type_data:
.quad .
.quad cpu_table
.size __lookup_processor_type_data, . - __lookup_processor_type_data
- __vet_fdt。
/*
* Determine validity of the x21 FDT pointer.
* The dtb must be 8-byte aligned and live in the first 512M of memory.
*/
__vet_fdt:
tst x21, #0x7 // 是否字节对齐
b.ne 1f // 不对齐直接跳转到标号1处,将x21清0,并返回
cmp x21, x24 // x21保存的是device tree的物理地址,x24保存的是kernel起始的物理内存地址
b.lt 1f // 当device tree的物理地址小于kernel起始的物理地址时,直接跳转到标号1处,将x21清0,并返回
mov x0, #(1 << 29) // x0 = 0x40000000 = 512M
add x0, x0, x24
cmp x21, x0 // 判断
device tree的物理地址是否在kernel起始的512M内存空间内部
b.ge 1f
ret
1:
mov x21, #0
ret
ENDPROC(__vet_fdt)
- __create_page_tables。
/*
* Setup the initial page tables. We only setup the barest amount which is
* required to get the kernel running. The following sections are required:
* - identity mapping to enable the MMU (low address, TTBR0)
* - first few MB of the kernel linear mapping to jump to once the MMU has
* been enabled, including the FDT blob (TTBR1)
* - pgd entry for fixed mappings (TTBR1)
*/
// PGD对应Level 0 translation table、PUD对应Level 1 translation table、PMD对应Level 2 translation table、PTE(page table entry)对应Level 3 translation table
// 以4K页,39位虚拟地址空间为例
// TTBR0
存储User Space所在的页表,TTBR1存储Kernel Space的页表。
/*
Translation table lookup with 4KB pages:
+--------+--------+--------+--------+--------+--------+--------+--------+
|63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0|
+--------+--------+--------+--------+--------+--------+--------+--------+
| | | | | |
| | | | | v
| | | | | [11:0] in-page offset
| | | | +-> [20:12] L3 index
| | | +-----------> [29:21] L2 index
| | +---------------------> [38:30] L1 index
| +-------------------------------> [47:39] L0 index
+-------------------------------------------------> [63] TTBR0/1
48bit的地址被分成9+9+9+9+12
PGD(Level 0)、PUD(Level 1)、PMD(Level 2)、PTE(Level 3)的translation table中的entry都是512项,每个entry是8byte,所以这些translation table都是4KB,刚好是一页。
*/
__create_page_tables:
pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses // pgtbl是一个宏,用将
idmap_pg_dir和
swapper_p
g
_d
ir
的物理地址分别赋给x25和x26
mov x27, lr // 保存lr
/*
* Invalidate the idmap and swapper page tables to avoid potential
* dirty cache lines being evicted.
*/
mov x0, x25 // x0保存invalid cache的起始地址
add x1, x26, #SWAPPER_DIR_SIZE // x1保存invalid cache的结束地址
bl __inval_cache_range // 将idmap和swapper对应的cacheline设为无效
/*
* Clear the idmap and swapper page tables.
*/
mov x0, x25
add x6, x26, #SWAPPER_DIR_SIZE
1: stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
cmp x0, x6
b.lo 1b // 循环
将idmap和swapper内容清0
ldr x7, =MM_MMUFLAGS // 反汇编看到
MM_MMUFLAGS
值为0xffffffc000080340
/*
* Create the identity mapping. //
创建kernel user mapping
,identity mapping实际上就是建立整个内核(从KERNEL_START到KERNEL_END)的一致性mapping,即将物理地址等于虚拟地址。
*/
mov x0, x25 // idmap_pg_dir // x0保存
idmap_pg_dir的物理地址
ldr x3, =KERNEL_START //
KERNEL_START =
PAGE_OFFSET + TEXT_OFFSET,
PAGE_OFFSET =
0xfffff
fc000000000,
TEXT_OFFSET = 0x00080000
add x3, x3, x28 // __pa(KERNEL_START) // x3保存kernel代码段的起始地址(物理地址)
create_pgd_entry x0, x3, x5, x6 // 创建pgd页表,x0是pgd的基地址,x3是需要创建pgd页表的内存地址,x5和x6是临时变量
ldr x6, =KERNEL_END // #define KERNEL_END _end,_end定义在vmlinux.lds.S,顾名思义是kernel的结束地址
mov x5, x3 // __pa(KERNEL_START)
// x5保存
kernel的起始地址(物理地址)
add x6, x6, x28 // __pa(KERNEL_END) // x6保存
kernel的结束地址(物理地址)
create_block_map x0, x7, x3, x5, x6 //
创建pud页表,x0是pud的基地址,x7是flag,
x3是需要创建pud页表的内存地址 // 2MB block
/*
* Map the kernel image (starting with PHYS_OFFSET). // 创建kernel space mapping
*/
mov x0, x26 // swapper_pg_dir // swapper进程也就是idle进程的地址空间
mov x5, #PAGE_OFFSET // PAGE_OFFSET
等于0xffffffc000000000
create_pgd_entry x0, x5, x3, x6
// 创建pgd页表,x0是pgd的基地址,x5是需要创建pgd页表的内存地址,x3和x6是临时变量
ldr x6, =KERNEL_END
// #define KERNEL_END _end,_end定义在vmlinux.lds.S,顾名思义是kernel的结束地址
mov x3, x24 // phys offset //
x24保存的是kernel起始的物理内存地址
create_block_map x0, x7, x3, x5, x6
//
创建pud页表,x0是pud的基地址,x7是flag,
x3是需要创建pud页表的内存地址 // 2MB block
/*
* Map the FDT blob (maximum 2MB; must be within 512MB of
* PHYS_OFFSET).
*/
mov x3, x21 // FDT phys address // x21保存的是device tree的物理地址
and x3, x3, #~((1 << 21) - 1) // 2MB aligned // x3= x3 & 0xffffffffffe00000 // 2MB对齐
mov x6, #PAGE_OFFSET
// PAGE_OFFSET
等于0xffffffc000000000
sub x5, x3, x24 // subtract PHYS_OFFSET // x5等于device tree相对于
kernel起始的物理内存地址的偏移
tst x5, #~((1 << 29) - 1) // within 512MB? // 是否小于512MB
csel x21, xzr, x21, ne // zero the FDT pointer // 如果
x5大于512MB,则将x21清0
b.ne 1f
// 如果
x5大于512MB,跳转到标号1处
add x5, x5, x6 // __va(FDT blob) // x5等于device tree的虚拟内存地址
add x6, x5, #1 << 21 // 2MB for the FDT blob // 2MB,一般device tree编译生成的dtb只有几百KB
sub x6, x6, #1 // inclusive range
create_block_map x0, x7, x3, x5, x6
//
创建pud页表,x0是pud的基地址,x7是flag,
x3是需要创建pud页表的内存地址 // 2MB block
1:
/*
* Since the page tables have been populated with non-cacheable
* accesses (MMU disabled), invalidate the idmap and swapper page
* tables again to remove any speculatively loaded cache lines.
*/
mov x0, x25
add x1, x26, #SWAPPER_DIR_SIZE
bl __inval_cache_range
// 再次将idmap和swapper对应的cacheline设为无效
mov lr, x27 // 恢复lr
ret // 返回
ENDPROC(__create_page_tables)
.ltorg
.macro pgtbl, ttb0, ttb1, virt_to_phys // pgtbl x25, x26, x28
ldr \ttb1, =swapper_pg_dir // swapper_pg_dir
和idmap_pg_dir都在./kernel/vmlinux.lds.S定义,位于bss段后面,页对齐,大小均为3*4KB
ldr \ttb0, =idmap_pg_dir // 这两句代码执行后x25和x26保存的分别是idmap_pg_dir
和
swapper_p
g
_d
ir
的虚拟地址 //
swapper_p
g
_d
ir的3个4KB分别是PGD/PUD/PMD
add \ttb1, \ttb1, \virt_to_phys // 转换成物理地址
add \ttb0, \ttb0, \virt_to_phys //
转换成物理地址
.endm
/*
* Macro to create a table entry to the next page.
*
* tbl: page table address
* virt: virtual address
* shift: #imm page table shift
* ptrs: #imm pointers per table page
*
* Preserves: virt
* Corrupts: tmp1, tmp2
* Returns: tbl -> next level table page address
*/
.macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
lsr \tmp1, \virt, #\shift // tmp1 =
virt >> 30
and \tmp1, \tmp1, #\ptrs - 1 // table index // tmp1 = tmp1 & 0x1ff // 至此tmp1的值为virt的[39:31]bit
add \tmp2, \tbl, #PAGE_SIZE // tmp2 = tbl + 0x1000
orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type // tmp2 = tmp2 | 0x3
str \tmp2, [\tbl, \tmp1, lsl #3] // 将tmp2的值存入地址tbl + tmp1 * 8的内存中
add \tbl, \tbl, #PAGE_SIZE // next level table page // tbl = tbl + 0x1000
.endm
/*
* Macro to populate the PGD (and possibily PUD) for the corresponding
* block entry in the next level (tbl) for the given virtual address.
*
* Preserves: tbl, next, virt
* Corrupts: tmp1, tmp2
*/
.macro create_pgd_entry, tbl, virt, tmp1, tmp2
create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 // #define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_PGTABLE_LEVELS + 3) = (12 - 3) * 3 + 3 = 30 // #define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) = (1 << (39 - 30)) = 0x200
#if SWAPPER_PGTABLE_LEVELS == 3 // #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) = (3 - 1)
= 2
create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
#endif
.endm
/*
* Macro to populate block entries in the page table for the start..end
* virtual range (inclusive).
*
* Preserves: tbl, flags
* Corrupts: phys, start, end, pstate
*/
.macro create_block_map, tbl, flags, phys, start, end
lsr \phys, \phys, #BLOCK_SHIFT // phys = phys >> 21
lsr \start, \start, #BLOCK_SHIFT // start = start >> 21
and \start, \start, #PTRS_PER_PTE - 1 // table index // start = start & 0x1FF
// 至此start等于其初始值的[29:21]bit
orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry // phys = flags | (phys << 21)
lsr \end, \end, #BLOCK_SHIFT // end = end >> 21
and \end, \end, #PTRS_PER_PTE - 1 // table end index // end = end & 0x1FF // 至此end等于其初始值的[29:21]bit
9999: str \phys, [\tbl, \start, lsl #3] // store the entry //
// 将phys的值存入地址tbl + start * 8的内存中,即以start的
[29:21]bit为索引的以8byte为单位的页表中
add \start, \start, #1 // next entry
add \phys, \phys, #BLOCK_SIZE // next block // phys = phys + 0x200000
cmp \start, \end
b.ls 9999b
.endm