page = __alloc_pages_slowpath(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, migratetype);
static inline struct page * __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, struct zone *preferred_zone, int migratetype) { const gfp_t wait = gfp_mask & __GFP_WAIT; struct page *page = NULL; int alloc_flags; unsigned long pages_reclaimed = 0; unsigned long did_some_progress; bool sync_migration = false; bool deferred_compaction = false; bool contended_compaction = false; /* * In the slowpath, we sanity check order to avoid ever trying to * reclaim >= MAX_ORDER areas which will never succeed. Callers may * be using allocators in order of preference for an area that is * too large. */ if (order >= MAX_ORDER) {//健康检查 WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); return NULL; } /* * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and * __GFP_NOWARN set) should not cause reclaim since the subsystem * (f.e. slab) using GFP_THISNODE may choose to trigger reclaim * using a larger set of nodes after it has established that the * allowed per node queues are empty and that nodes are * over allocated. */ if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & GFP_THISNODE) == GFP_THISNODE)//不能重试 goto nopage; restart: if (!(gfp_mask & __GFP_NO_KSWAPD))//唤醒负责换出内存页的kswapd守护进程 wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(preferred_zone)); /* * OK, we're below the kswapd watermark and have kicked background * reclaim. Now things get more complex, so set up alloc_flags according * to how we want to proceed. */ alloc_flags = gfp_to_alloc_flags(gfp_mask);//集合下所有分配内存标识 /* * Find the true preferred zone if the allocation is unconstrained by * cpusets. *///找到一个最合适的zone,但这是在不考虑cpu集的情况下 if (!(alloc_flags & ALLOC_CPUSET) && !nodemask) first_zones_zonelist(zonelist, high_zoneidx, NULL, &preferred_zone); rebalance: /* This is the last chance, in general, before the goto nopage. *///再用快速分配试下,这是在最低水印的情况下 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, preferred_zone, migratetype); if (page) goto got_pg; /* Allocate without watermarks if the context allows */ if (alloc_flags & ALLOC_NO_WATERMARKS) { /* * Ignore mempolicies if ALLOC_NO_WATERMARKS on the grounds * the allocation is high priority and these type of * allocations are system rather than user orientated */ zonelist = node_zonelist(numa_node_id(), gfp_mask);//这里返回的是本地节点的备用zonelist[0] //高优先级分配,主要是 没有水印分配,如果是不能失败分配,则下面一定会分配到内存,否则就系统崩溃了 page = __alloc_pages_high_priority(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, migratetype); if (page) { goto got_pg; } } /* Atomic allocations - we can't balance anything */ if (!wait)//不能等待,则失败返回。这可以看出后面要睡眠等待 goto nopage; /* Avoid recursion of direct reclaim */ if (current->flags & PF_MEMALLOC)//如果是执行公务分配内存,则失败返回,避免递归等待,产生死锁 goto nopage; /* Avoid allocations with no watermarks from looping endlessly */ if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) goto nopage; /* * Try direct compaction. The first pass is asynchronous. Subsequent * attempts after direct reclaim are synchronous *///试着直接压缩内存,先是异步,下次再用同步 page = __alloc_pages_direct_compact(gfp_mask, order, zonelist, high_zoneidx, nodemask, alloc_flags, preferred_zone, migratetype, sync_migration, &contended_compaction, &deferred_compaction, &did_some_progress); if (page) goto got_pg; sync_migration = true;//设置为同步 /* * If compaction is deferred for high-order allocations, it is because * sync compaction recently failed. In this is the case and the caller * requested a movable allocation that does not heavily disrupt the * system then fail the allocation instead of entering direct reclaim. */ if ((deferred_compaction || contended_compaction) && (gfp_mask & __GFP_NO_KSWAPD)) goto nopage; /* Try direct reclaim and then allocating */ page = __alloc_pages_direct_reclaim(gfp_mask, order, zonelist, high_zoneidx, nodemask, alloc_flags, preferred_zone, migratetype, &did_some_progress); if (page) goto got_pg; /* * If we failed to make any progress reclaiming, then we are * running out of options and have to consider going OOM */ if (!did_some_progress) {//页面没有回收成功,只有OOM了 if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {//可以调用文件系统函数,可以重试 if (oom_killer_disabled)//如果关闭了OOM,则返回NULL goto nopage; /* Coredumps can quickly deplete all memory reserves */ if ((current->flags & PF_DUMPCORE) && !(gfp_mask & __GFP_NOFAIL)) goto nopage; page = __alloc_pages_may_oom(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, migratetype); if (page) goto got_pg; if (!(gfp_mask & __GFP_NOFAIL)) { /* * The oom killer is not called for high-order * allocations that may fail, so if no progress * is being made, there are no other options and * retrying is unlikely to help. */ if (order > PAGE_ALLOC_COSTLY_ORDER)//如果需要分配的阶太大了,就没办法了,就算杀了一个进程,释放该进程占用的所有内存估计也不够 goto nopage;//所以干脆告诉你没有内存 /* * The oom killer is not called for lowmem * allocations to prevent needlessly killing * innocent tasks. */ if (high_zoneidx < ZONE_NORMAL)//如果合适的zone已经试到DMA区域了,那还是放弃吧,因为DMA区域的内存本来就少 goto nopage; } goto restart;//不能失败的话,那只好再到回去重试了 } } //运行到这里的话,表明上一步回收内存成功了 /* Check if we should retry the allocation */ pages_reclaimed += did_some_progress; if (should_alloc_retry(gfp_mask, order, did_some_progress, pages_reclaimed)) {//如果还要重试分配内存 /* Wait for some write requests to complete then retry */ wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);//则等待一会再试,等待其他页面的空闲 goto rebalance; } else {//不再试了, /* * High-order allocations do not necessarily loop after * direct reclaim and reclaim/compaction depends on compaction * being called after reclaim so call directly if necessary *///再一次压缩下内存,看看能否分配到内存 page = __alloc_pages_direct_compact(gfp_mask, order, zonelist, high_zoneidx, nodemask, alloc_flags, preferred_zone, migratetype, sync_migration, &contended_compaction, &deferred_compaction, &did_some_progress); if (page) goto got_pg; } nopage: warn_alloc_failed(gfp_mask, order, NULL); return page; got_pg: if (kmemcheck_enabled) kmemcheck_pagealloc_alloc(page, order, gfp_mask); return page; }
/* * This is called in the allocator slow-path if the allocation request is of * sufficient urgency to ignore watermarks and take other desperate measures */ static inline struct page * __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, struct zone *preferred_zone, int migratetype) { struct page *page; do { page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, ALLOC_NO_WATERMARKS, preferred_zone, migratetype);//没有水印去分配内存,不遗余力的了 if (!page && gfp_mask & __GFP_NOFAIL) wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);//同步/异步的等待数据回写,然后空出页面 } while (!page && (gfp_mask & __GFP_NOFAIL));//不能失败,死循环,直到分配到page return page; }
#ifdef CONFIG_COMPACTION /* Try memory compaction for high-order allocations before reclaim */ static struct page * __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, int migratetype, bool sync_migration, bool *contended_compaction, bool *deferred_compaction, unsigned long *did_some_progress) { if (!order) return NULL; if (compaction_deferred(preferred_zone, order)) { *deferred_compaction = true; return NULL; } current->flags |= PF_MEMALLOC; *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, nodemask, sync_migration, contended_compaction); current->flags &= ~PF_MEMALLOC; if (*did_some_progress != COMPACT_SKIPPED) { struct page *page; /* Page migration frees to the PCP lists but we want merging */ drain_pages(get_cpu()); put_cpu(); page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, preferred_zone, migratetype); if (page) { preferred_zone->compact_blockskip_flush = false; preferred_zone->compact_considered = 0; preferred_zone->compact_defer_shift = 0; if (order >= preferred_zone->compact_order_failed) preferred_zone->compact_order_failed = order + 1; count_vm_event(COMPACTSUCCESS); return page; } /* * It's bad if compaction run occurs and fails. * The most likely reason is that pages exist, * but not enough to satisfy watermarks. */ count_vm_event(COMPACTFAIL); /* * As async compaction considers a subset of pageblocks, only * defer if the failure was a sync compaction failure. */ if (sync_migration) defer_compaction(preferred_zone, order); cond_resched(); } return NULL; }