分配每个进程的pgd。
mm_alloc:fs/exec.c: bprm->mm = mm = mm_alloc(); //exec加载新二进制程序
mm_init :kernel/fork.c:
mm_alloc_pgd
pgd_alloc
pgd_alloc函数: arch/arm64/mm/pgd.c
pgd_t *pgd_alloc(struct mm_struct *mm)
{
if (PGD_SIZE == PAGE_SIZE)
return (pgd_t *)__get_free_page(PGALLOC_GFP);
else
return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
}
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
上述PGD_SIZE在PA_BITS为48bit的机器上:PTRS_PER_PGD 64 * sizeof(pgd_t) 8 = 512。因此会用kmem_cache_create接口slub来分配小块内存。
返回pgd表虚拟地址。
pgd_cache 的kmeme_cache对象初始化在arch/arm64/mm/pgd.c#L31
void __init pgd_cache_init(void)
{
if (PGD_SIZE == PAGE_SIZE)
return;
#ifdef CONFIG_ARM64_PA_BITS_52
/*
* With 52-bit physical addresses, the architecture requires the
* top-level table to be aligned to at least 64 bytes.
*/
BUILD_BUG_ON(PGD_SIZE < 64);
#endif
/*
* Naturally aligned pgds required by the architecture.
*/
pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_SIZE,
SLAB_PANIC, NULL);
}
因此在该arm64平台下分配的pgd表512字节。其中有64个pgd_t 项。
__schedule
context_switch
switch_mm_irqs_off(switch_mm) arg:next
check_and_switch_context
cpu_switch_mm
static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
{
BUG_ON(pgd == swapper_pg_dir);
cpu_set_reserved_ttbr0();
cpu_do_switch_mm(virt_to_phys(pgd),mm);
}
ENTRY(cpu_do_switch_mm)
mrs x2, ttbr1_el1 //保存最高位为1(内核地址空间)的ttbr1_el1寄存器
mmid x1, x1 // get mm->context.id
phys_to_ttbr x3, x0
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
bfi x3, x1, #48, #16 // set the ASID field in TTBR0
#endif
bfi x2, x1, #48, #16 // set the ASID
msr ttbr1_el1, x2 // in TTBR1 (since TCR.A1 is set)
isb
msr ttbr0_el1, x3 // now update TTBR0 更新用户空间ttbr0寄存器
isb
b post_ttbr_update_workaround // Back to C code...
ENDPROC(cpu_do_switch_mm)
arch/arm64/include/asm/pgtable-types.h
typedef u64 pteval_t;
typedef u64 pmdval_t;
typedef u64 pudval_t;
typedef u64 pgdval_t;/*
* These are used to make use of C type-checking..
*/
typedef struct { pteval_t pte; } pte_t;
#define pte_val(x) ((x).pte)
#define __pte(x) ((pte_t) { (x) } )#if CONFIG_PGTABLE_LEVELS > 2
typedef struct { pmdval_t pmd; } pmd_t;
#define pmd_val(x) ((x).pmd)
#define __pmd(x) ((pmd_t) { (x) } )
#endif#if CONFIG_PGTABLE_LEVELS > 3
typedef struct { pudval_t pud; } pud_t;
#define pud_val(x) ((x).pud)
#define __pud(x) ((pud_t) { (x) } )
#endiftypedef struct { pgdval_t pgd; } pgd_t;
#define pgd_val(x) ((x).pgd)
#define __pgd(x) ((pgd_t) { (x) } )typedef struct { pteval_t pgprot; } pgprot_t;
#define pgprot_val(x) ((x).pgprot)
#define __pgprot(x) ((pgprot_t) { (x) } )
PGD:
#define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))#define pgd_offset_raw(pgd, addr) ((pgd) + pgd_index(addr))
#define pgd_offset(mm, addr) (pgd_offset_raw((mm)->pgd, (addr))) //返回addr在pgd表中的虚地址
#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
图示:
PUD: pud的转换稍微复杂一点。
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))#define pud_offset_phys(dir, addr) (pgd_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
#define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr)))) //dir是pgd_t的虚拟地址。返回addr在pud中的虚拟地址//根据pgd_t的地址,计算出pud表的物理地址。为何要绕那么远呢 ???
static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
{
return __pgd_to_phys(pgd);
}#define pgd_val(x) ((x).pgd)
#define __pte(x) ((pte_t) { (x) } )//将pgd转换成pte,先取出pgd_t中包含的物理地址,转换成pte_t结构体。
static inline pte_t pgd_pte(pgd_t pgd)
{
return __pte(pgd_val(pgd));
}#define __pgd_to_phys(pgd) __pte_to_phys(pgd_pte(pgd)) //返回pgd_t 中的内容
#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK) //pte.pte
PMD: 同理。
#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))#define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t))
#define pmd_offset(dir, addr) ((pmd_t *)__va(pmd_offset_phys((dir), (addr))))