Linux源码剖析struct page结构体flags成员

概述

struct page是mm种最核心的结构体之一,可以说整个内存管理就是围绕page展开的,不同场景下page的状态各有不同,page->flags标志位是描述page状态的重要成员,定义在include/linux/page-flags.h:

enum pageflags {
    PG_locked,      /* Page is locked. Don't touch. */
    //跟page reclaim的二次机会法有关
    PG_referenced,
    //page缓存内存和磁盘数据一致
    PG_uptodate,
    //代表是脏页
    PG_dirty,
    //page在lru链表中
    PG_lru,
    //page在active lru链表中
    PG_active,

    PG_workingset,
    PG_waiters,     /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
    //IO错误
    PG_error,
    //page是对应的是slab内存
    PG_slab,
    PG_owner_priv_1,    /* Owner use. If pagecache, fs may use*/
    PG_arch_1,
    //不能换出
    PG_reserved,
    PG_private,     /* If pagecache, has fs-private data */
    PG_private_2,       /* If pagecache, has fs aux data */
    //正在回写
    PG_writeback,       /* Page is under writeback */
    PG_head,        /* A head page */
    PG_mappedtodisk,    /* Has blocks allocated on-disk */
    //马上开始回收,回收前设置
    PG_reclaim,     /* To be reclaimed asap */
    //匿名页和shmem page设置该条件
    PG_swapbacked,      /* Page is backed by RAM/swap */
    PG_unevictable,     /* Page is "unevictable"  */
#ifdef CONFIG_MMU
    //被mlock了
    PG_mlocked,     /* Page is vma mlocked */
#endif

#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
    PG_young,
    PG_idle,
#endif
};

PG_locked

表示page被lock,上锁之后其他等待该lock的调用会睡眠等待,主要是用于page的竞态保护,内核提供lock_page/trylock_page/unlock_page相关函数。

trylock_page:检测PG_locked flag,如果已经设置返回false,否则设置返回true。

Linux源码剖析struct page结构体flags成员_第1张图片

lock_page: 先检测是否上锁,如果trylock_page返回false代表已经设置过了,那么调用进__lock_page会睡眠等待,如果未设置过,那么trylock_page直接设置返回。

Linux源码剖析struct page结构体flags成员_第2张图片

 unlock_page: clear掉PG_locked flags,唤醒等待的进程

Linux源码剖析struct page结构体flags成员_第3张图片

 PG_Dirty

到底什么哪些类型的页面,什么场景会设置脏页?

设置:只要写回磁盘或者交换分区(包括zram压缩方式的page)都会设置PG_Dirty,比如要回收的匿名页(add_to_swap中设置PG_Dirty),shmem页面,或者file-back的页面。因为如果不设置PG_Dirty标志(真正clean的page除外)就无法pageout写回磁盘或者交换区(包括zram)。

清除:通常准备写入磁盘或者交换分区前clear,见下面的pageout函数。


/*
 * shrink_page_list() returns the number of reclaimed pages
 */
static unsigned int shrink_page_list(struct list_head *page_list,
				     struct pglist_data *pgdat,
				     struct scan_control *sc,
				     enum ttu_flags ttu_flags,
				     struct reclaim_stat *stat,
				     bool ignore_references)
{
	LIST_HEAD(ret_pages);
	LIST_HEAD(free_pages);
	unsigned int nr_reclaimed = 0;
	unsigned int pgactivate = 0;

	memset(stat, 0, sizeof(*stat));
	cond_resched();

	while (!list_empty(page_list)) {
        ...

		if (PageDirty(page)) {
            ...
			try_to_unmap_flush_dirty();
			switch (pageout(page, mapping)) {
            ...
		}

	return nr_reclaimed;
}

PG_Writeback

表示page正在回写,向swap分区写入和文件系统中向磁盘写入都会设置该标志位。一般调用set_page_writeback函数设置。一般在向块设备层submit io前设置,io完成取消。

swap分区写入场景举例:

设置writeback:

int __swap_writepage(struct page *page, struct writeback_control *wbc,
        bio_end_io_t end_write_func)
{
    ...
    set_page_writeback(page);
    unlock_page(page);
    submit_bio(bio);
out:
    return ret;
}

IO完成取消writeback标志:

 Linux源码剖析struct page结构体flags成员_第4张图片

PG_Reclaim

一般写回磁盘或者交换分区前设置,回写成功clear掉标志,ClearPageReclaim见上面的end_page_writeback,不论普通文件系统中文件会写磁盘,还是回写交换分区,io完成都会回调该函数,然后由于回写之前设置了PG_Reclaim,这里回写完成就会清理掉。

设置代码:


/*
 * pageout is called by shrink_page_list() for each dirty page.
 * Calls ->writepage().
 */
static pageout_t pageout(struct page *page, struct address_space *mapping)
{
	/*
	 * If the page is dirty, only perform writeback if that write
	 * will be non-blocking.  To prevent this allocation from being
	 * stalled by pagecache activity.  But note that there may be
	 * stalls if we need to run get_block().  We could test
	 * PagePrivate for that.
	 *
	 * If this process is currently in __generic_file_write_iter() against
	 * this page's queue, we can perform writeback even if that
	 * will block.
	 *
	 * If the page is swapcache, write it back even if that would
	 * block, for some throttling. This happens by accident, because
	 * swap_backing_dev_info is bust: it doesn't reflect the
	 * congestion state of the swapdevs.  Easy to fix, if needed.
	 */
	if (!is_page_cache_freeable(page))
		return PAGE_KEEP;
	if (!mapping) {
		/*
		 * Some data journaling orphaned pages can have
		 * page->mapping == NULL while being dirty with clean buffers.
		 */
		if (page_has_private(page)) {
			if (try_to_free_buffers(page)) {
				ClearPageDirty(page);
				pr_info("%s: orphaned page\n", __func__);
				return PAGE_CLEAN;
			}
		}
		return PAGE_KEEP;
	}
	if (mapping->a_ops->writepage == NULL)
		return PAGE_ACTIVATE;
	if (!may_write_to_inode(mapping->host))
		return PAGE_KEEP;

    //因为准备要回写磁盘或者交换分区了,清理PageDirty
	if (clear_page_dirty_for_io(page)) {
		int res;
		struct writeback_control wbc = {
			.sync_mode = WB_SYNC_NONE,
			.nr_to_write = SWAP_CLUSTER_MAX,
			.range_start = 0,
			.range_end = LLONG_MAX,
			.for_reclaim = 1,
		};

        //设置该标志,代表马上开始回收了
		SetPageReclaim(page);
		res = mapping->a_ops->writepage(page, &wbc);
		if (res < 0)
			handle_write_error(mapping, page, res);
		if (res == AOP_WRITEPAGE_ACTIVATE) {
			ClearPageReclaim(page);
			return PAGE_ACTIVATE;
		}

		if (!PageWriteback(page)) {
			/* synchronous write or broken a_ops? */
			ClearPageReclaim(page);
		}
		trace_mm_vmscan_writepage(page);
		inc_node_page_state(page, NR_VMSCAN_WRITE);
		return PAGE_SUCCESS;
	}

	return PAGE_CLEAN;
}

你可能感兴趣的:(内存子系统,linux,运维,服务器)