Figure 1:Vmalloc Areas
Figure 2:将非连续3个物理pages映射到连续虚拟地址
注:图片来至《Professional Linux kernel architecture》
函数调用关系:
1) vmap_page_range(unsigned long start, unsigned long end,
pgprot_t prot, struct page **pages)
2) static int vmap_page_range_noflush(unsigned long start, unsigned long end,
pgprot_t prot, struct page **pages)
3) static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr)
4) static int vmap_pmd_range(pud_t *pud, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr)
//该两个函数实际没做任何事,因为ARM920T只支持2级映射
5) static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr)
//实际填充一级二级页表的函数
6) flush_cache_vmap(start, end);
FIXME:以ARM920T为例,small page(4KiB)映射,MMU只支持2级页表映射,PGD &PTE.
kernel:2.6.35
#ifndef VMALLOC_START
#define VMALLOC_OFFSET (8*1024*1024)
#define VMALLOC_START (((unsigned long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))
#endif
static int vmap_page_range(unsigned long start, unsigned long end,
pgprot_t prot, struct page **pages)
{
int ret;
ret = vmap_page_range_noflush(start, end, prot, pages);
//start,end为开始的虚拟地址,范围在VMALLOC_START ~ VMALLOC_END之间
//VMALLOC_START = high_mem+8MiB
//prot为MMU页表标志位
//pages指向物理页框表的指针(非连续的物理页框)
flush_cache_vmap(start, end);
return ret;
}
static int vmap_page_range_noflush(unsigned long start, unsigned long end,
pgprot_t prot, struct page **pages)
{
pgd_t *pgd;
unsigned long next;
unsigned long addr = start;
int err = 0;
int nr = 0;
BUG_ON(addr >= end);
pgd = pgd_offset_k(addr);
//得到的pgd的地址,pgd的基地址+(addr>>21),两个连续的一级查找表(2×1MiB)
do {
next = pgd_addr_end(addr, end);
err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
if (err)
return err;
} while (pgd++, addr = next, addr != end);
return nr;
}
static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr)
{
pud_t *pud;
unsigned long next;
pud = pud_alloc(&init_mm, pgd, addr);
//定义了4level-fixup.h后,pud得到的还是pgd的地址
//#define pud_t pgd_t
if (!pud)
return -ENOMEM;
do {
next = pud_addr_end(addr, end);
if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
return -ENOMEM;
} while (pud++, addr = next, addr != end);
return 0;
}
static int vmap_pmd_range(pud_t *pud, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr)
{
pmd_t *pmd;
unsigned long next;
pmd = pmd_alloc(&init_mm, pud, addr);
//同pud_alloc,因为只支持2级页表映射,这个函数同样返回pud的地址
if (!pmd)
return -ENOMEM;
do {
next = pmd_addr_end(addr, end);
if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
return -ENOMEM;
} while (pmd++, addr = next, addr != end);
return 0;
}
//vmap映射的最关键函数
static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr)
{
pte_t *pte;
/*
* nr is a running index into the array which helps higher level
* callers keep track of where we're up to.
*/
pte = pte_alloc_kernel(pmd, addr);
//分配二级表地址并返回,并完成一级表的赋值工作,*pmd中填写二级页表的基地址,
//参照create_mapping
if (!pte)
return -ENOMEM;
do {
struct page *page = pages[*nr];
if (WARN_ON(!pte_none(*pte)))
return -EBUSY;
if (WARN_ON(!page))
return -ENOMEM;
set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
//设置2级页表pte的内容,完成一次Vmap
(*nr)++;
} while (pte++, addr += PAGE_SIZE, addr != end);
return 0;
}
#define pte_alloc_kernel(pmd, address) /
((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? /
NULL: pte_offset_kernel(pmd, address))
//pmd_present(*(pmd))在未填充时为空,!pmd_present(*(pmd))返回true
int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
{
pte_t *new = pte_alloc_one_kernel(&init_mm, address);
//利用Buddy allocator分配1page的空间
if (!new)
return -ENOMEM;
smp_wmb(); /* See comment in __pte_alloc */
spin_lock(&init_mm.page_table_lock);
if (!pmd_present(*pmd)) { /* Has another populated it ? */
pmd_populate_kernel(&init_mm, pmd, new);
//给pgd的地址空间赋值,为pte的基地址
new = NULL;
}
spin_unlock(&init_mm.page_table_lock);
if (new)
pte_free_kernel(&init_mm, new);
return 0;
//返回为false
}
Author :woodpecker <[email protected]>