其实这个函数的注释已经非常详细,细细品味分享自己的理解。对应的内核版本是4.14.129 ARM64.
/*
* vm_normal_page -- This function gets the "struct page" associated with a pte.
*
* "Special" mappings do not wish to be associated with a "struct page" (either
* it doesn't exist, or it exists but they don't want to touch it). In this
* case, NULL is returned here. "Normal" mappings do have a struct page.
*
* There are 2 broad cases. Firstly, an architecture may define a pte_special()
* pte bit, in which case this function is trivial. Secondly, an architecture
* may not have a spare pte bit, which requires a more complicated scheme,
* described below.
*
* A raw VM_PFNMAP mapping (ie. one that is not COWed) is always considered a
* special mapping (even if there are underlying and valid "struct pages").
* COWed pages of a VM_PFNMAP are always normal.
*
* The way we recognize COWed pages within VM_PFNMAP mappings is through the
* rules set up by "remap_pfn_range()": the vma will have the VM_PFNMAP bit
* set, and the vm_pgoff will point to the first PFN mapped: thus every special
* mapping will always honor the rule
*
* pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT)
*
* And for normal mappings this is false.
*
* This restricts such mappings to be a linear translation from virtual address
* to pfn. To get around this restriction, we allow arbitrary mappings so long
* as the vma is not a COW mapping; in that case, we know that all ptes are
* special (because none can have been COWed).
*
*
* In order to support COW of arbitrary special mappings, we have VM_MIXEDMAP.
*
* VM_MIXEDMAP mappings can likewise contain memory with or without "struct
* page" backing, however the difference is that _all_ pages with a struct
* page (that is, those where pfn_valid is true) are refcounted and considered
* normal pages by the VM. The disadvantage is that pages are refcounted
* (which can be slower and simply not an option for some PFNMAP users). The
* advantage is that we don't have to follow the strict linearity rule of
* PFNMAP mappings in order to support COWable mappings.
*
*/
#ifdef __HAVE_ARCH_PTE_SPECIAL //默认是有定义的
# define HAVE_PTE_SPECIAL 1
#else
# define HAVE_PTE_SPECIAL 0
#endif
struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
pte_t pte, bool with_public_device)
{
unsigned long pfn = pte_pfn(pte);
if (HAVE_PTE_SPECIAL) {
if (likely(!pte_special(pte))) //正常的页面都没有PTE_SPECIAL
goto check_pfn;
//PTE_SPECIAL为特殊标志,因为都是零页、特殊物理页面(VM_PFNMAP/VM_MIXEDMAP),不需要和struct page打交道,所以直接返回NULL
//vm_insert_mixed/vm_insert_pfn:VM_MIXEDMAP,这个有可能是驱动自己处理缺页,缺页异常处理通过vm_struct_ops->fault()回调函数处理,驱动自行申请page填充到VMA中。
//remap_pfn_range:VM_PFNMAP,此函数最内层会同时设置PTE_SPECIAL。直接使用物理地址填充到页表项,不需要和struct page打交道。
if (vma->vm_ops && vma->vm_ops->find_special_page)
return vma->vm_ops->find_special_page(vma, addr);//一般是配合VM_MIXEDMAP一起使用,驱动负责自行转换,当前版本用到地方只有一个驱动。
if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))//到这里肯定是PTE_SPECIAL| (VM_PFNMAP | VM_MIXEDMAP),都是物理地址填充到页表,没有struct page。这个特殊页面如果共享的话,在COW处理时只能设置写权限,无法真的执行copy。详细可以看wp_pfn_shared函数。
return NULL;
if (is_zero_pfn(pfn)) //零页没有struct page结构,一般都是直接转换为PFN填充到页表项
return NULL;
/*
* Device public pages are special pages (they are ZONE_DEVICE
* pages but different from persistent memory). They behave
* allmost like normal pages. The difference is that they are
* not on the lru and thus should never be involve with any-
* thing that involve lru manipulation (mlock, numa balancing,
* ...).
*
* This is why we still want to return NULL for such page from
* vm_normal_page() so that we do not have to special case all
* call site of vm_normal_page().
*/
if (likely(pfn <= highest_memmap_pfn)) {
struct page *page = pfn_to_page(pfn);
if (is_device_public_page(page)) {
if (with_public_device)
return page;
return NULL;
}
}
print_bad_pte(vma, addr, pte, NULL);
return NULL;
}
/* !HAVE_PTE_SPECIAL case follows: */
if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
if (vma->vm_flags & VM_MIXEDMAP) {
if (!pfn_valid(pfn)) //如果只有VM_MIXEDMAP标志,则需要判读是否是有效PFN,只要属于memblock都可认为有struct page。
return NULL;
goto out;
} else { //如果只有VM_PFNMAP则需要判断addr所在pfn和vma->vm_pgoff一致才能确定是特殊物理页面。
unsigned long off;
off = (addr - vma->vm_start) >> PAGE_SHIFT;
if (pfn == vma->vm_pgoff + off)
return NULL;
if (!is_cow_mapping(vma->vm_flags))
return NULL;
}
}
if (is_zero_pfn(pfn))
return NULL;
check_pfn:
if (unlikely(pfn > highest_memmap_pfn)) {
print_bad_pte(vma, addr, pte, NULL);
return NULL;
}
/*
* NOTE! We still have PageReserved() pages in the page tables.
* eg. VDSO mappings can cause them to exist.
*/
out:
return pfn_to_page(pfn);
}
只分析CONFIG_SPARSEMEM_VMEMMAP情况:
#elif defined(CONFIG_SPARSEMEM_VMEMMAP)
/* memmap is virtually contiguous. */
#define __pfn_to_page(pfn) (vmemmap + (pfn))
#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
其实应该换成以下公式,这样理解更好:
__pfn_to_page(pfn) = ((struct page *)VMEMMAP_START + ((pfn)-(memstart_addr >> PAGE_SHIFT)))
其中:
(struct page *)VMEMMAP_START 是认为是struct page[],其中page[0]对应就是(memstart_addr >> PAGE_SHIFT)。
(memstart_addr >> PAGE_SHIFT)则认为起始物理地址对应的pfn。