vm_normal_page深入理解

其实这个函数的注释已经非常详细,细细品味分享自己的理解。对应的内核版本是4.14.129 ARM64.

/*
 * vm_normal_page -- This function gets the "struct page" associated with a pte.
 *
 * "Special" mappings do not wish to be associated with a "struct page" (either
 * it doesn't exist, or it exists but they don't want to touch it). In this
 * case, NULL is returned here. "Normal" mappings do have a struct page.
 *
 * There are 2 broad cases. Firstly, an architecture may define a pte_special()
 * pte bit, in which case this function is trivial. Secondly, an architecture
 * may not have a spare pte bit, which requires a more complicated scheme,
 * described below.
 *
 * A raw VM_PFNMAP mapping (ie. one that is not COWed) is always considered a
 * special mapping (even if there are underlying and valid "struct pages").
 * COWed pages of a VM_PFNMAP are always normal.
 *
 * The way we recognize COWed pages within VM_PFNMAP mappings is through the
 * rules set up by "remap_pfn_range()": the vma will have the VM_PFNMAP bit
 * set, and the vm_pgoff will point to the first PFN mapped: thus every special
 * mapping will always honor the rule
 *
 *    pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT)
 *
 * And for normal mappings this is false.
 *
 * This restricts such mappings to be a linear translation from virtual address
 * to pfn. To get around this restriction, we allow arbitrary mappings so long
 * as the vma is not a COW mapping; in that case, we know that all ptes are
 * special (because none can have been COWed).
 *
 *
 * In order to support COW of arbitrary special mappings, we have VM_MIXEDMAP.
 *
 * VM_MIXEDMAP mappings can likewise contain memory with or without "struct
 * page" backing, however the difference is that _all_ pages with a struct
 * page (that is, those where pfn_valid is true) are refcounted and considered
 * normal pages by the VM. The disadvantage is that pages are refcounted
 * (which can be slower and simply not an option for some PFNMAP users). The
 * advantage is that we don't have to follow the strict linearity rule of
 * PFNMAP mappings in order to support COWable mappings.
 *
 */
#ifdef __HAVE_ARCH_PTE_SPECIAL   //默认是有定义的
# define HAVE_PTE_SPECIAL 1
#else
# define HAVE_PTE_SPECIAL 0
#endif
struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
                 pte_t pte, bool with_public_device)
{
    unsigned long pfn = pte_pfn(pte);

    if (HAVE_PTE_SPECIAL) {        
        if (likely(!pte_special(pte)))    //正常的页面都没有PTE_SPECIAL
            goto check_pfn; 

//PTE_SPECIAL为特殊标志,因为都是零页、特殊物理页面(VM_PFNMAP/VM_MIXEDMAP),不需要和struct page打交道,所以直接返回NULL

//vm_insert_mixed/vm_insert_pfn:VM_MIXEDMAP,这个有可能是驱动自己处理缺页,缺页异常处理通过vm_struct_ops->fault()回调函数处理,驱动自行申请page填充到VMA中。

//remap_pfn_range:VM_PFNMAP,此函数最内层会同时设置PTE_SPECIAL。直接使用物理地址填充到页表项,不需要和struct page打交道。
        if (vma->vm_ops && vma->vm_ops->find_special_page)
            return vma->vm_ops->find_special_page(vma, addr);//一般是配合VM_MIXEDMAP一起使用,驱动负责自行转换,当前版本用到地方只有一个驱动。
        if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))//到这里肯定是PTE_SPECIAL| (VM_PFNMAP | VM_MIXEDMAP),都是物理地址填充到页表,没有struct page。这个特殊页面如果共享的话,在COW处理时只能设置写权限,无法真的执行copy。详细可以看wp_pfn_shared函数。
            return NULL;
        if (is_zero_pfn(pfn)) //零页没有struct page结构,一般都是直接转换为PFN填充到页表项
            return NULL;

        /*
         * Device public pages are special pages (they are ZONE_DEVICE
         * pages but different from persistent memory). They behave
         * allmost like normal pages. The difference is that they are
         * not on the lru and thus should never be involve with any-
         * thing that involve lru manipulation (mlock, numa balancing,
         * ...).
         *
         * This is why we still want to return NULL for such page from
         * vm_normal_page() so that we do not have to special case all
         * call site of vm_normal_page().
         */
        if (likely(pfn <= highest_memmap_pfn)) {
            struct page *page = pfn_to_page(pfn);

            if (is_device_public_page(page)) {
                if (with_public_device)
                    return page;
                return NULL;
            }
        }
        print_bad_pte(vma, addr, pte, NULL);
        return NULL;
    }

    /* !HAVE_PTE_SPECIAL case follows: */

    if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
        if (vma->vm_flags & VM_MIXEDMAP) {
            if (!pfn_valid(pfn)) //如果只有VM_MIXEDMAP标志,则需要判读是否是有效PFN,只要属于memblock都可认为有struct page。
                return NULL;
            goto out;
        } else { //如果只有VM_PFNMAP则需要判断addr所在pfn和vma->vm_pgoff一致才能确定是特殊物理页面。
            unsigned long off;
            off = (addr - vma->vm_start) >> PAGE_SHIFT;
            if (pfn == vma->vm_pgoff + off)
                return NULL;
            if (!is_cow_mapping(vma->vm_flags))
                return NULL;
        }
    }

    if (is_zero_pfn(pfn))
        return NULL;
check_pfn:
    if (unlikely(pfn > highest_memmap_pfn)) {
        print_bad_pte(vma, addr, pte, NULL);
        return NULL;
    }

    /*
     * NOTE! We still have PageReserved() pages in the page tables.
     * eg. VDSO mappings can cause them to exist.
     */
out:
    return pfn_to_page(pfn);
}

只分析CONFIG_SPARSEMEM_VMEMMAP情况:
#elif defined(CONFIG_SPARSEMEM_VMEMMAP)

/* memmap is virtually contiguous.  */
#define __pfn_to_page(pfn)    (vmemmap + (pfn))

#define vmemmap            ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))

其实应该换成以下公式,这样理解更好:

__pfn_to_page(pfn) =  ((struct page *)VMEMMAP_START + ((pfn)-(memstart_addr >> PAGE_SHIFT)))

其中:

(struct page *)VMEMMAP_START 是认为是struct page[],其中page[0]对应就是(memstart_addr >> PAGE_SHIFT)。

(memstart_addr >> PAGE_SHIFT)则认为起始物理地址对应的pfn。

你可能感兴趣的:(kernel分析,内存管理)