接前一篇文章:Linux内核内存管理源码分析之init-mm.c(1)
本文内容参考:https://www.cnblogs.com/mysky007/p/12317831.html
上回说到了swapper_pg_dir和init_top_gpt。再次给出内核源码中init_top_gpt的说明,在Documentation/admin-guide/kdump/vmcoreinfo.rst中:
init_top_pgt
------------
Used to walk through the whole page table and convert virtual addresses
to physical addresses. The init_top_pgt is somewhat similar to
swapper_pg_dir, but it is only used in x86_64.
由以上介绍可以看到,init_top_pgt是x86架构所独有的。
和用户态页表不同,在系统初始化的时候,就要创建内核页表了。swapper_pg_dir指向内核最顶级的目录pgd,也就是内核页表的根。
前文已经指出了swapper_pg_dir是宏定义,本文来看更多相关内容。在arch/x86/include/asm/pgtable_64.h中,代码如下:
extern p4d_t level4_kernel_pgt[512];
extern p4d_t level4_ident_pgt[512];
extern pud_t level3_kernel_pgt[512];
extern pud_t level3_ident_pgt[512];
extern pmd_t level2_kernel_pgt[512];
extern pmd_t level2_fixmap_pgt[512];
extern pmd_t level2_ident_pgt[512];
extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM];
extern pgd_t init_top_pgt[];
#define swapper_pg_dir init_top_pgt
上述代码中,除了swapper_pg_dir 指向内核最顶级的目录pgd外,同时出现的还有几个页表目录。其中:
XXX_ident_pgt —— 对应的是直接映射区;
XXX_kernel_pgt —— 对应的是内核代码区;
XXX_fixmap_pgt —— 对应的是固定映射区。
那么init_top_gpt以及XXX_ident_pgt、XXX_kernel_pgt、XXX_fixmap_pgt这些变量都是在哪里初始化的呢?是在arch/x86/kernel/head_64.S中:
__INITDATA
.balign 4
SYM_DATA_START_PTI_ALIGNED(early_top_pgt)
.fill 512,8,0
.fill PTI_USER_PGD_FILL,8,0
SYM_DATA_END(early_top_pgt)
SYM_DATA_START_PAGE_ALIGNED(early_dynamic_pgts)
.fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
SYM_DATA_END(early_dynamic_pgts)
SYM_DATA(early_recursion_flag, .long 0)
.data
#if defined(CONFIG_XEN_PV) || defined(CONFIG_PVH)
SYM_DATA_START_PTI_ALIGNED(init_top_pgt)
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + L4_PAGE_OFFSET*8, 0
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + L4_START_KERNEL*8, 0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
.fill PTI_USER_PGD_FILL,8,0
SYM_DATA_END(init_top_pgt)
SYM_DATA_START_PAGE_ALIGNED(level3_ident_pgt)
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.fill 511, 8, 0
SYM_DATA_END(level3_ident_pgt)
SYM_DATA_START_PAGE_ALIGNED(level2_ident_pgt)
/*
* Since I easily can, map the first 1G.
* Don't set NX because code runs from these pages.
*
* Note: This sets _PAGE_GLOBAL despite whether
* the CPU supports it or it is enabled. But,
* the CPU should ignore the bit.
*/
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
SYM_DATA_END(level2_ident_pgt)
#else
SYM_DATA_START_PTI_ALIGNED(init_top_pgt)
.fill 512,8,0
.fill PTI_USER_PGD_FILL,8,0
SYM_DATA_END(init_top_pgt)
#endif
#ifdef CONFIG_X86_5LEVEL
SYM_DATA_START_PAGE_ALIGNED(level4_kernel_pgt)
.fill 511,8,0
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
SYM_DATA_END(level4_kernel_pgt)
#endif
SYM_DATA_START_PAGE_ALIGNED(level3_kernel_pgt)
.fill L3_START_KERNEL,8,0
/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
.quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
SYM_DATA_END(level3_kernel_pgt)
SYM_DATA_START_PAGE_ALIGNED(level2_kernel_pgt)
/*
* Kernel high mapping.
*
* The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in
* virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled,
* 512 MiB otherwise.
*
* (NOTE: after that starts the module area, see MODULES_VADDR.)
*
* This table is eventually used by the kernel during normal runtime.
* Care must be taken to clear out undesired bits later, like _PAGE_RW
* or _PAGE_GLOBAL in some cases.
*/
PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE/PMD_SIZE)
SYM_DATA_END(level2_kernel_pgt)
SYM_DATA_START_PAGE_ALIGNED(level2_fixmap_pgt)
.fill (512 - 4 - FIXMAP_PMD_NUM),8,0
pgtno = 0
.rept (FIXMAP_PMD_NUM)
.quad level1_fixmap_pgt + (pgtno << PAGE_SHIFT) - __START_KERNEL_map \
+ _PAGE_TABLE_NOENC;
pgtno = pgtno + 1
.endr
/* 6 MB reserved space + a 2MB hole */
.fill 4,8,0
SYM_DATA_END(level2_fixmap_pgt)
SYM_DATA_START_PAGE_ALIGNED(level1_fixmap_pgt)
.rept (FIXMAP_PMD_NUM)
.fill 512,8,0
.endr
SYM_DATA_END(level1_fixmap_pgt)
#undef PMDS
.data
.align 16
SYM_DATA(early_gdt_descr, .word GDT_ENTRIES*8-1)
SYM_DATA_LOCAL(early_gdt_descr_base, .quad INIT_PER_CPU_VAR(gdt_page))
.align 16
/* This must match the first entry in level2_kernel_pgt */
SYM_DATA(phys_base, .quad 0x0)
EXPORT_SYMBOL(phys_base)
如下图所示:
内核页表顶级目录 init_top_pgt包括3项:
(1) level3_ident_pgt
即直接映射区页表的三级目录。因为level3_ident_pgt是定义在虚拟地址的内核代码段里的(写代码的时候,使用的都是虚拟地址,此时谁也不无法知道将来加载的物理地址是多少),而 __START_KERNEL_map正是虚拟地址空间的内核代码段的起始地址,因此level3_ident_pgt减去__START_KERNEL_map才转换为物理地址。
(2)PGD_PAGE_OFFSET
__PAGE_OFFSET_BASE 是虚拟地址空间里面内核的起始地址。第二项也指向 level3_ident_pgt,直接映射区。
(3)level3_kernel_pgt
内核代码区。