物理内存以页面为单位管理内存,这些物理内存称为物理页面或者页帧,操作系统为了管理这些物理页面(页帧)需要按照物理地址给每个页帧编号,这个编号称之为页帧号(Page Frame Number – PFN)。页帧号是与物理内存中的一个页帧(page frame)相关联的唯一标识符。
两者之间的关系:
物理页帧号(PFN)和物理地址之间存在一种简单的对应关系。物理页帧号可以被视为物理地址的索引或偏移量,通过一些计算可以将物理页帧号转换为对应的物理地址。
物理页帧号是一个无符号整数,用于标识物理内存页。它代表了物理页在系统中的位置或索引。每个物理页都有一个唯一的物理页帧号。
物理地址是实际的硬件地址,用于访问系统的物理内存。它表示内存中特定位置的物理存储单元。
其计算公式:
// linux-4.19.90/include/linux/pfn.h
/* PAGE_SHIFT determines the page size */
#define PAGE_SHIFT 12
#define PFN_PHYS(x) ((phys_addr_t)(x) << PAGE_SHIFT)
#define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT))
// linux-4.19.90/include/asm-generic/memory_model.h
/*
* Convert a physical address to a Page Frame Number and back
*/
#define __phys_to_pfn(paddr) PHYS_PFN(paddr)
#define __pfn_to_phys(pfn) PFN_PHYS(pfn)
(1)
// linux-4.19.90/include/asm-generic/memory_model.h
/*
* supports 3 memory models.
*/
#if defined(CONFIG_FLATMEM)
#define __pfn_to_page(pfn) (mem_map + ((pfn) - ARCH_PFN_OFFSET))
#define __page_to_pfn(page) ((unsigned long)((page) - mem_map) + \
ARCH_PFN_OFFSET)
#elif defined(CONFIG_DISCONTIGMEM)
#define __pfn_to_page(pfn) \
({ unsigned long __pfn = (pfn); \
unsigned long __nid = arch_pfn_to_nid(__pfn); \
NODE_DATA(__nid)->node_mem_map + arch_local_page_offset(__pfn, __nid);\
})
#define __page_to_pfn(pg) \
({ const struct page *__pg = (pg); \
struct pglist_data *__pgdat = NODE_DATA(page_to_nid(__pg)); \
(unsigned long)(__pg - __pgdat->node_mem_map) + \
__pgdat->node_start_pfn; \
})
#elif defined(CONFIG_SPARSEMEM_VMEMMAP)
/* memmap is virtually contiguous. */
#define __pfn_to_page(pfn) (vmemmap + (pfn))
#define __page_to_pfn(page) (unsigned long)((page) - vmemmap)
#elif defined(CONFIG_SPARSEMEM)
/*
* Note: section's mem_map is encoded to reflect its start_pfn.
* section[i].section_mem_map == mem_map's address - start_pfn;
*/
#define __page_to_pfn(pg) \
({ const struct page *__pg = (pg); \
int __sec = page_to_section(__pg); \
(unsigned long)(__pg - __section_mem_map_addr(__nr_to_section(__sec))); \
})
#define __pfn_to_page(pfn) \
({ unsigned long __pfn = (pfn); \
struct mem_section *__sec = __pfn_to_section(__pfn); \
__section_mem_map_addr(__sec) + __pfn; \
})
#endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */
#define page_to_pfn __page_to_pfn
#define pfn_to_page __pfn_to_page
由于目前大多数服务器和桌面系统都配置了CONFIG_SPARSEMEM_VMEMMAP:
# cat /etc/redhat-release
CentOS Linux release 7.9.2009 (Core)
# cat /boot/config-3.10.0-1160.el7.x86_64 | grep CONFIG_SPARSEMEM_VMEMMAP
CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
CONFIG_SPARSEMEM_VMEMMAP=y
# cat /boot/config-4.19.90-23.8.v2101.ky10.x86_64 | grep CONFIG_SPARSEMEM_VMEMMAP
CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
CONFIG_SPARSEMEM_VMEMMAP=y
因此:
if defined(CONFIG_SPARSEMEM_VMEMMAP)
/* memmap is virtually contiguous. */
#define __pfn_to_page(pfn) (vmemmap + (pfn))
#define __page_to_pfn(page) (unsigned long)((page) - vmemmap)
#define page_to_pfn __page_to_pfn
#define pfn_to_page __pfn_to_page
pfn与 struct page 之间的转化只是一个常量 vmemmap 的偏移关系,让virt_to_page/page_address成为简单的移位/添加操作。
具体可参考这篇文章:https://xiaolizai.blog.csdn.net/article/details/132079120
这里是x86_64架构:
// linux-4.19.90/arch/x86/include/asm/page_64.h
static inline unsigned long __phys_addr_nodebug(unsigned long x)
{
unsigned long y = x - __START_KERNEL_map;
/* use the carry flag to determine if x was < __START_KERNEL_map */
x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET));
return x;
}
#define __phys_addr(x) __phys_addr_nodebug(x)
// linux-4.19.90/arch/x86/include/asm/page.h
#define __pa(x) __phys_addr((unsigned long)(x))
宏__pa(x)根据给定的虚拟地址x转化为物理地址
// linux-4.19.90/arch/x86/include/asm\page.h
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
宏__va根据给定的物理地址x转化为内核虚拟地址
#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT)
pfn 与 virt 与之间的转换页非常简单:
virt_to_pfn:先调用__pa将内核虚拟地址转化为物理地址,然后再右移12位得到 pfn。
pfn_to_virt:先将pfn左移12位的到物理地址,再调用__va将物理地址转化为内核虚拟地址。
(1)
typedef struct pglist_data {
......
unsigned long node_start_pfn;
unsigned long node_present_pages; /* total number of physical pages */
unsigned long node_spanned_pages; /* total size of physical page range, including holes */
......
} pg_data_t;
node_start_pfn: 节点的起始页帧号。
node_present_pages: 节点中当前存在可用的物理页的数量。
node_spanned_pages: 节点中总的物理页数,包括空洞。
static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
{
return pgdat->node_start_pfn + pgdat->node_spanned_pages;
}
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
#define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
node_start_pfn 和 node_end_pfn两个宏用来获取指定节点的起始页帧号和结束页帧号。
内核代码中的使用:
/*
* Scan data sections and all the referenced memory blocks allocated via the
* kernel's standard allocators. This function must be called with the
* scan_mutex held.
*/
static void kmemleak_scan(void)
{
......
/*
* Struct page scanning for each node.
*/
get_online_mems();
int i;
for_each_online_node(i) {
unsigned long start_pfn = node_start_pfn(i);
unsigned long end_pfn = node_end_pfn(i);
unsigned long pfn;
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
struct page *page;
if (!pfn_valid(pfn))
continue;
page = pfn_to_page(pfn);
/* only scan if page is in use */
if (page_count(page) == 0)
continue;
scan_block(page, page + 1, NULL);
if (!(pfn & 63))
cond_resched();
}
}
put_online_mems();
......
}
对于每个在线节点:
获取节点的起始页帧号和结束页帧号。
遍历节点内的每个页帧:
如果页帧无效,则跳过。
如果页面的引用计数为 0,则跳过。
扫描页面所在的内存块。
每处理 64 个页帧,调用 cond_resched() 进行调度切换
(2)
/*
* These are _only_ used during initialisation, therefore they
* can use __initdata ... They could have names to indicate
* this restriction.
*/
#ifdef CONFIG_NUMA
#define pfn_to_nid(pfn) \
({ \
unsigned long __pfn_to_nid_pfn = (pfn); \
page_to_nid(pfn_to_page(__pfn_to_nid_pfn)); \
})
(1)pfn_valid
x86_64 架构没有配置CONFIG_HAVE_ARCH_PFN_VALID:
#ifndef CONFIG_HAVE_ARCH_PFN_VALID
static inline int pfn_valid(unsigned long pfn)
{
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
return 0;
return valid_section(__nr_to_section(pfn_to_section_nr(pfn)));
}
#endif
aarch64:
# cat /boot/config-5.4.18-74-generic | grep CONFIG_HAVE_ARCH_PFN_VALID
CONFIG_HAVE_ARCH_PFN_VALID=y
# cat /proc/kallsyms | grep pfn_valid
ffffffc01009fb48 T pfn_valid
#ifdef CONFIG_HAVE_ARCH_PFN_VALID
int pfn_valid(unsigned long pfn)
{
phys_addr_t addr = pfn << PAGE_SHIFT;
if ((addr >> PAGE_SHIFT) != pfn)
return 0;
return memblock_is_map_memory(addr);
}
EXPORT_SYMBOL(pfn_valid);
#endif
Linux 4.19.90