伙伴系统分配器大体上分为两类。__get_free_pages()类函数返回分配的第一个页面的线性地址;alloc_pages()类函数返回页面描述符地址。不管以哪种函数进行分配,最终会调用alloc_pages()进行分配页面;
alloc_page最后调用统一接口;__alloc_pages_nodemask
* This is the 'heart' of the zoned buddy allocator. */ struct page * __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, nodemask_t *nodemask) { enum zone_type high_zoneidx = gfp_zone(gfp_mask);/*根据gfp_mask确定分配页所处的管理区*/ struct zone *preferred_zone; struct page *page = NULL; int migratetype = allocflags_to_migratetype(gfp_mask); /*根据gfp_mask得到迁移类分配页的型*/ unsigned int cpuset_mems_cookie; gfp_mask &= gfp_allowed_mask; lockdep_trace_alloc(gfp_mask); might_sleep_if(gfp_mask & __GFP_WAIT);/*如果__GFP_WAIT标志设置了,需要等待和重新调度*/ if (should_fail_alloc_page(gfp_mask, order)) return NULL; /* * Check the zones suitable for the gfp_mask contain at least one * valid zone. It's possible to have an empty zonelist as a result * of GFP_THISNODE and a memoryless node */ if (unlikely(!zonelist->_zonerefs->zone)) return NULL; retry_cpuset: cpuset_mems_cookie = get_mems_allowed(); /* The preferred zone is used for statistics later */ //从zonelist中找到zone_idx与high_zoneidx相同的管理区, first_zones_zonelist(zonelist, high_zoneidx, nodemask ? : &cpuset_current_mems_allowed, &preferred_zone); if (!preferred_zone) goto out; /* First allocation attempt */ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET, preferred_zone, migratetype); if (unlikely(!page))/*第一次分配失败的话则会用通过一条低速路径来进行第二次分配,包括唤醒页换出守护进程等等*/ page = __alloc_pages_slowpath(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, migratetype); trace_mm_page_alloc(page, order, gfp_mask, migratetype); out: /* * When updating a task's mems_allowed, it is possible to race with * parallel threads in such a way that an allocation can fail while * the mask is being updated. If a page allocation is about to fail, * check if the cpuset changed during allocation and if so, retry. */ if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) goto retry_cpuset; return page;
/* * get_page_from_freelist goes through the zonelist trying to allocate * a page. */ static struct page * get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, struct zonelist *zonelist, int high_zoneidx, int alloc_flags, struct zone *preferred_zone, int migratetype) { struct zoneref *z; struct page *page = NULL; int classzone_idx; struct zone *zone; nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ int zlc_active = 0; /* set if using zonelist_cache */ int did_zlc_setup = 0; /* just call zlc_setup() one time */ classzone_idx = zone_idx(preferred_zone);/*zone对应的下标*/ zonelist_scan: /* * Scan zonelist, looking for a zone with enough free. * See also cpuset_zone_allowed() comment in kernel/cpuset.c. *//*遍历每个zone,直到找到一个拥有足够空间的管理区进行分配,<span style="font-family: 宋体;">*/ </span><span> 例如,如果high_zoneidx对应的ZONE_HIGHMEM,则遍历顺序为HIGHMEM-->NORMAL-->DMA, 如果high_zoneidx对应ZONE_NORMAL,则遍历顺序为NORMAL-->DMA*/ </span> for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, nodemask) { if (NUMA_BUILD && zlc_active && !zlc_zone_worth_trying(zonelist, z, allowednodes)) /*在UMA模式下不成立*/ continue; if ((alloc_flags & ALLOC_CPUSET) && !cpuset_zone_allowed_softwall(zone, gfp_mask)) continue; /* * When allocating a page cache page for writing, we * want to get it from a zone that is within its dirty * limit, such that no single zone holds more than its * proportional share of globally allowed dirty pages. * The dirty limits take into account the zone's * lowmem reserves and high watermark so that kswapd * should be able to balance it without having to * write pages from its LRU list. * * This may look like it could increase pressure on * lower zones by failing allocations in higher zones * before they are full. But the pages that do spill * over are limited as the lower zones are protected * by this very same mechanism. It should not become * a practical burden to them. * * XXX: For now, allow allocations to potentially * exceed the per-zone dirty limit in the slowpath * (ALLOC_WMARK_LOW unset) before going into reclaim, * which is important when on a NUMA setup the allowed * zones are together not big enough to reach the * global limit. The proper fix for these situations * will require awareness of zones in the * dirty-throttling and the flusher threads. */ if ((alloc_flags & ALLOC_WMARK_LOW) && (gfp_mask & __GFP_WRITE) && !zone_dirty_ok(zone)) /*检查给定的内存域是否属于该进程允许运行的CPU*/ goto this_zone_full; BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { unsigned long mark; int ret; <span> /*通过alloc_flags来确定是使用何种水印,pages_min?pages_low?pages_high? 选择了一种水印,就要求分配后的空闲不低于该水印才能进行分配*/ </span> mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; if (zone_watermark_ok(zone, order, mark, classzone_idx, alloc_flags)) /*如果水位正常,从本zone中分配*/ goto try_this_zone; if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) { /* * we do zlc_setup if there are multiple nodes * and before considering the first zone allowed * by the cpuset. */ allowednodes = zlc_setup(zonelist, alloc_flags); zlc_active = 1; did_zlc_setup = 1; } if (zone_reclaim_mode == 0)/*如果上面检查的水位低于正常值,且没有设置页面回收值*/ goto this_zone_full; /* * As we may have just activated ZLC, check if the first * eligible zone has failed zone_reclaim recently. */ if (NUMA_BUILD && zlc_active && !zlc_zone_worth_trying(zonelist, z, allowednodes)) continue; ret = zone_reclaim(zone, gfp_mask, order); switch (ret) { case ZONE_RECLAIM_NOSCAN: /* did not scan */ continue; case ZONE_RECLAIM_FULL: /* scanned but unreclaimable */ continue; default: /* did we reclaim enough */ if (!zone_watermark_ok(zone, order, mark, classzone_idx, alloc_flags)) goto this_zone_full; } } try_this_zone:/*本zone正常水位*/ <span> /*先从pcp中分配,然后不行的话再从伙伴系统中分配*/ </span> page = buffered_rmqueue(preferred_zone, zone, order, gfp_mask, migratetype); if (page) break; this_zone_full: if (NUMA_BUILD) zlc_mark_zone_full(zonelist, z); } if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { /* Disable zlc cache for second zonelist scan */ zlc_active = 0; goto zonelist_scan; } if (page) /* * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was * necessary to allocate the page. The expectation is * that the caller is taking steps that will free more * memory. The caller should avoid the page being used * for !PFMEMALLOC purposes. */ page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS); return page; }
* * Really, prep_compound_page() should be called from __rmqueue_bulk(). But * we cheat by calling it from here, in the order > 0 path. Saves a branch * or two. *//*先考虑从pcp中分配空间,当order大于0时再考虑从伙伴系统中分配*/ static inline struct page *buffered_rmqueue(struct zone *preferred_zone, struct zone *zone, int order, gfp_t gfp_flags, int migratetype) { unsigned long flags; struct page *page; int cold = !!(gfp_flags & __GFP_COLD);//cold or hot again: if (likely(order == 0)) {/*order为0,即要求分配一个页*/ struct per_cpu_pages *pcp; struct list_head *list; local_irq_save(flags); pcp = &this_cpu_ptr(zone->pageset)->pcp; /*找到zone对应的cpu的pcp*/ list = &pcp->lists[migratetype];/*获取和迁移类型对应的链表*/ if (list_empty(list)) { /*如果链表为空,则表示没有可分配的页,需要补充,从伙伴系统中获得batch个页面给list*/ pcp->count += rmqueue_bulk(zone, 0, pcp->batch, list, migratetype, cold); if (unlikely(list_empty(list))) goto failed; } if (cold)/*如果是需要冷页,则从链表的尾部获取*/ page = list_entry(list->prev, struct page, lru); else page = list_entry(list->next, struct page, lru); list_del(&page->lru); pcp->count--; } else {/*当order为大于1时,不从pcp中分配,直接考虑从伙伴系统中分配*/ if (unlikely(gfp_flags & __GFP_NOFAIL)) { /* * __GFP_NOFAIL is not to be used in new code. * * All __GFP_NOFAIL callers should be fixed so that they * properly detect and handle allocation failures. * * We most definitely don't want callers attempting to * allocate greater than order-1 page units with * __GFP_NOFAIL. */ WARN_ON_ONCE(order > 1); } spin_lock_irqsave(&zone->lock, flags); page = __rmqueue(zone, order, migratetype); spin_unlock(&zone->lock);/* 这里仅仅打开自旋锁,待后面统计计数设置完毕后再开中断*/ if (!page) goto failed; <span> /* 已经分配了1 << order个页面,这里进行管理区空闲页面统计计数*/ </span> __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); } __count_zone_vm_events(PGALLOC, zone, 1 << order); zone_statistics(preferred_zone, zone, gfp_flags); local_irq_restore(flags);/*恢复中断*/ VM_BUG_ON(bad_range(zone, page)); if (prep_new_page(page, order, gfp_flags)) /* 这里进行安全性检查,并进行一些善后工作。如果页面标志破坏,返回的页面出现了问题,则返回试图分配其他页面*/ goto again; return page; failed: local_irq_restore(flags); return NULL;
* * Obtain a specified number of elements from the buddy allocator, all under * a single hold of the lock, for efficiency. Add them to the supplied list. * Returns the number of new pages which were placed at *list. */*该函数返回的是1<<order个页面,但是在pcp 处理中调用,其他地方没看到,order为0 也就是说返回的是页面数,加入的链表为 对应调用pcp的链表*/ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, int cold) { int mt = migratetype, i; spin_lock(&zone->lock);/* 上层函数已经关了中断,这里需要操作管理区,获取管理区的自旋锁 */ for (i = 0; i < count; ++i) {/* 重复指定的次数,从伙伴系统中分配页面*/ struct page *page = __rmqueue(zone, order, migratetype); /* 从伙伴系统中取出页面 */ if (unlikely(page == NULL)) break; /* * Split buddy pages returned by expand() are received here * in physical page order. The page is added to the callers and * list and the list head then moves forward. From the callers * perspective, the linked list is ordered by page number in * some conditions. This is useful for IO devices that can * merge IO requests if the physical pages are ordered * properly. *//*根据调用者的要求,将页面放到每CPU缓存链表的头部或者尾部*/ if (likely(cold == 0)) list_add(&page->lru, list); else list_add_tail(&page->lru, list); if (IS_ENABLED(CONFIG_CMA)) { mt = get_pageblock_migratetype(page); if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE) mt = migratetype; } set_page_private(page, mt);/*设置private属性为页面的迁移类型*/ list = &page->lru; } __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));/*递减管理区的空闲页面计数*/ spin_unlock(&zone->lock);/*释放管理区的子璇锁*/ return i;
/* * Do the hard work of removing an element from the buddy allocator. * Call me with the zone->lock already held. */ static struct page *__rmqueue(struct zone *zone, unsigned int order, int migratetype) { struct page *page; retry_reserve: page = __rmqueue_smallest(zone, order, migratetype);/*从指定order开始从小到达遍历,优先从指定的迁移类型链表中分配页面*/ <span> </span>/* * 如果满足以下两个条件,就从备用链表中分配页面: * 快速流程没有分配到页面,需要从备用迁移链表中分配. * 当前不是从保留的链表中分配.因为保留的链表是最后可用的链表, * 不能从该链表分配的话,说明本管理区真的没有可用内存了. */ if (unlikely(!page) && migratetype != MIGRATE_RESERVE) { page = __rmqueue_fallback(zone, order, migratetype); /*order从大到小遍历,从备用(从各种迁移类型中寻找)链表中分配页面*/ /* * Use MIGRATE_RESERVE rather than fail an allocation. goto * is used because __rmqueue_smallest is an inline function * and we want just one call site */ if (!page) { migratetype = MIGRATE_RESERVE;/* 备用链表中没有分配到页面,从保留链表中分配页面了 更换迁移类型 */ goto retry_reserve; } } trace_mm_page_alloc_zone_locked(page, order, migratetype); return page; }从指定的迁移类型链表中分配页面
从指定order开始从小到达遍历,优先从指定的迁移类型链表中分配页面
/* * Go through the free lists for the given migratetype and remove * the smallest available page from the freelists */ /*从给定的order开始,从小到大遍历; 找到后返回页面基址,合并分割后2^h-2^k的空间*/ static inline struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, int migratetype) { unsigned int current_order; struct free_area * area; struct page *page; /* Find a page of the appropriate size in the preferred list */ for (current_order = order; current_order < MAX_ORDER; ++current_order) { area = &(zone->free_area[current_order]); if (list_empty(&area->free_list[migratetype])) continue; <span> /*对应的链表不空,得到链表中数据*/ </span> page = list_entry(area->free_list[migratetype].next, struct page, lru); list_del(&page->lru); rmv_page_order(page); area->nr_free--; <span> /*进行</span>合并2^h-2^k空间<span>(在current_order>order的情况下)*/ </span> expand(zone, page, order, current_order, area, migratetype); /*合并2^h-2^k空间*/ return page; } return NULL; }
/* * The order of subdivision here is critical for the IO subsystem. * Please do not alter this order without good reasons and regression * testing. Specifically, as large blocks of memory are subdivided, * the order in which smaller blocks are delivered depends on the order * they're subdivided in this function. This is the primary factor * influencing the order in which pages are delivered to the IO * subsystem according to empirical testing, and this is also justified * by considering the behavior of a buddy system containing a single * large block of memory acted on by a series of small allocations. * This behavior is a critical factor in sglist merging's success. * * -- wli */ /*此函数主要用于下面这种情况: 分配函数从high中分割出去了low大小的内存; 然后要将high留下的内存块合并放到伙伴系统中;*/ static inline void expand(struct zone *zone, struct page *page, int low, int high, struct free_area *area, int migratetype) { unsigned long size = 1 << high; while (high > low) { area--;/*减一到order减一的area*/ high--;/*order减一*/ size >>= 1; VM_BUG_ON(bad_range(zone, &page[size])); #ifdef CONFIG_DEBUG_PAGEALLOC if (high < debug_guardpage_minorder()) { /* * Mark as guard pages (or page), that will allow to * merge back to allocator when buddy will be freed. * Corresponding page table entries will not be touched, * pages will stay not present in virtual address space */ INIT_LIST_HEAD(&page[size].lru); set_page_guard_flag(&page[size]); set_page_private(&page[size], high); /* Guard pages are not available for any usage */ __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << high)); continue; } #endif list_add(&page[size].lru, &area->free_list[migratetype]); /*加到指定的伙伴系统中*/ area->nr_free++;/*空闲块加一*/ set_page_order(&page[size], high);/*设定private域为high*/ } }从备用链表中分配页面
/* Remove an element from the buddy allocator from the fallback list */ static inline struct page * __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) { struct free_area * area; int current_order; struct page *page; int migratetype, i; /* Find the largest possible block of pages in the other list */ /* 从最高阶搜索,这样可以尽量的将其他迁移列表中的大块分割,避免形成过多的碎片 */ for (current_order = MAX_ORDER-1; current_order >= order; --current_order) { for (i = 0;; i++) { migratetype = fallbacks[start_migratetype][i]; /*回调到下一个migratetype*/ <span style="white-space:pre"> /* 本函数不处理MIGRATE_RESERVE类型的迁移链表,如果本函数返回NULL, 则上层函数直接从MIGRATE_RESERVE中分配 */ </span> /* MIGRATE_RESERVE handled later if necessary */ if (migratetype == MIGRATE_RESERVE) break; area = &(zone->free_area[current_order]); if (list_empty(&area->free_list[migratetype])) /*如果指定order和类型的链表为空*/ continue; <span style="white-space:pre"> /*得到指定类型和order的页面基址*/ </span> page = list_entry(area->free_list[migratetype].next, struct page, lru); area->nr_free--; /* * If breaking a large block of pages, move all free * pages to the preferred allocation list. If falling * back for a reclaimable kernel allocation, be more * aggressive about taking ownership of free pages * * On the other hand, never change migration * type of MIGRATE_CMA pageblocks nor move CMA * pages on different free lists. We don't * want unmovable pages to be allocated from * MIGRATE_CMA areas. */ if (!is_migrate_cma(migratetype) && (unlikely(current_order >= pageblock_order / 2) ||/* 要分割的页面是一个大页面,则将整个页面全部迁移到当前迁移类型的链表中, 这样可以避免过多的碎片 */ start_migratetype == MIGRATE_RECLAIMABLE ||/* 目前分配的是可回收页面,这类页面有突发的特点,将页面全部迁移到可回收链表中, 可以避免将其他迁移链表分割成太多的碎片 */ page_group_by_mobility_disabled)) {/* 指定了迁移策略,总是将被分割的页面迁移 */ int pages; pages = move_freepages_block(zone, page,start_migratetype); /*移动到先前类型的伙伴系统中*/ /* Claim the whole block if over half of it is free */ /* pages是移动的页面数,如果可移动的页面数量较多, 则将整个大内存块的迁移类型修改 */ if (pages >= (1 << (pageblock_order-1)) || page_group_by_mobility_disabled) set_pageblock_migratetype(page,start_migratetype); /*设置页面标示*/ migratetype = start_migratetype; } /* Remove the page from the freelists */ list_del(&page->lru); rmv_page_order(page); /* Take ownership for orders >= pageblock_order *///大于pageblock_order的部分设置相应标示 if (current_order >= pageblock_order && !is_migrate_cma(migratetype)) change_pageblock_range(page, current_order, start_migratetype); expand(zone, page, order, current_order, area, is_migrate_cma(migratetype) ? migratetype : start_migratetype); /*拆分和合并*/ trace_mm_page_alloc_extfrag(page, order, current_order, start_migratetype, migratetype); return page; } } return NULL; }备用链表
/* * This array describes the order lists are fallen back to when * the free lists for the desirable migrate type are depleted */ /*指定类型的链表为空时,这个数组规定 回调的到那个类型的链表*/ static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = { [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, [MIGRATE_RESERVE] = { MIGRATE_RESERVE, MIGRATE_RESERVE, MIGRATE_RESERVE }, /* Never used */ };移动到指定类型的伙伴系统中
将指定区域段的页面移动到指定类型的伙伴系统中,其实就是将页面的类型做了 更改,但是是采用移动的方式功能和上面函数类似,但是要求以页面块方式对其*/
/ static int move_freepages_block(struct zone *zone, struct page *page, int migratetype) { unsigned long start_pfn, end_pfn; struct page *start_page, *end_page; /*如下是对齐操作,其中变量pageblock_nr_pages为MAX_ORDER-1*/ start_pfn = page_to_pfn(page); start_pfn = start_pfn & ~(pageblock_nr_pages-1); start_page = pfn_to_page(start_pfn); end_page = start_page + pageblock_nr_pages - 1; end_pfn = start_pfn + pageblock_nr_pages - 1; /* Do not cross zone boundaries */ if (start_pfn < zone->zone_start_pfn) start_page = page; /*结束边界检查*/ if (end_pfn >= zone->zone_start_pfn + zone->spanned_pages) return 0; /*调用上面函数*/ return move_freepages(zone, start_page, end_page, migratetype); }将指定区域段的页面移动到指定类型的伙伴系统中,实际上将页面的类型做了更改
/* * Move the free pages in a range to the free lists of the requested type. * Note that start_page and end_pages are not aligned on a pageblock * boundary. If alignment is required, use move_freepages_block() */ static int move_freepages(struct zone *zone, struct page *start_page, struct page *end_page, int migratetype) { struct page *page; unsigned long order; int pages_moved = 0; #ifndef CONFIG_HOLES_IN_ZONE /* * page_zone is not safe to call in this context when * CONFIG_HOLES_IN_ZONE is set. This bug check is probably redundant * anyway as we check zone boundaries in move_freepages_block(). * Remove at a later date when no bug reports exist related to * grouping pages by mobility */ BUG_ON(page_zone(start_page) != page_zone(end_page)); #endif for (page = start_page; page <= end_page;) { /* Make sure we are not inadvertently changing nodes */ VM_BUG_ON(page_to_nid(page) != zone_to_nid(zone)); if (!pfn_valid_within(page_to_pfn(page))) { page++; continue; } if (!PageBuddy(page)) { page++; continue; } order = page_order(page); list_del(&page->lru);/*将页面块从原来的伙伴系统链表*/ /*中删除,注意,这里不是一个页面 *而是以该页面的伙伴块*/ list_add(&page->lru,/*添加到指定order和类型下的伙伴系统链表*/ &zone->free_area[order].free_list[migratetype]); page += 1 << order;/*移动页面数往上定位*/ pages_moved += 1 << order;/*移动的页面数*/ } return pages_moved; }慢速分配,允许等待和回收
当无法快速分配页面时,如果调用者允许等待 则通过本函数进行慢速分配。 此时允许进行内存回收。
static inline struct page * __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, struct zone *preferred_zone, int migratetype) { const gfp_t wait = gfp_mask & __GFP_WAIT; struct page *page = NULL; int alloc_flags; unsigned long pages_reclaimed = 0; unsigned long did_some_progress; bool sync_migration = false; bool deferred_compaction = false; bool contended_compaction = false; /* * In the slowpath, we sanity check order to avoid ever trying to * reclaim >= MAX_ORDER areas which will never succeed. Callers may * be using allocators in order of preference for an area that is * too large. */ if (order >= MAX_ORDER) { WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); return NULL; } /* * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and * __GFP_NOWARN set) should not cause reclaim since the subsystem * (f.e. slab) using GFP_THISNODE may choose to trigger reclaim * using a larger set of nodes after it has established that the * allowed per node queues are empty and that nodes are * over allocated. *//** * 调用者指定了GFP_THISNODE标志,表示不能进行内存回收。 * 上层调用者应当在指定了GFP_THISNODE失败后,使用其他标志进行分配。 */ if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) goto nopage; restart: if (!(gfp_mask & __GFP_NO_KSWAPD))/*如果调用者没有禁止kswapd,则唤醒该线程进行内存回收。*/ wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(preferred_zone)); /* * OK, we're below the kswapd watermark and have kicked background * reclaim. Now things get more complex, so set up alloc_flags according * to how we want to proceed. */ alloc_flags = gfp_to_alloc_flags(gfp_mask);/*根据分配标志确定内部标志,主要是用于水线 */ /* * Find the true preferred zone if the allocation is unconstrained by * cpusets. *//** * 与快速分配流程相比,这里的分配标志使用了低的水线。 * 在进行内存回收操作前,我们使用低水线再尝试分配一下。 * 当然,不管是否允许ALLOC_NO_WATERMARKS标志,我们都将它清除。 */ if (!(alloc_flags & ALLOC_CPUSET) && !nodemask) first_zones_zonelist(zonelist, high_zoneidx, NULL, &preferred_zone); rebalance: /* This is the last chance, in general, before the goto nopage. */ /* 某些上下文,如内存回收进程及被杀死的任务,都允许它完全突破水线的限制分配内存。 */ page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, preferred_zone, migratetype); if (page)) goto got_pg; /* Allocate without watermarks if the context allows */ if (alloc_flags & ALLOC_NO_WATERMARKS) { /* * Ignore mempolicies if ALLOC_NO_WATERMARKS on the grounds * the allocation is high priority and these type of * allocations are system rather than user orientated */ zonelist = node_zonelist(numa_node_id(), gfp_mask); page = __alloc_pages_high_priority(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, migratetype); if (page) {<pre name="code" class="cpp" style="font-size: 18px; line-height: 26px;"><span style="white-space:pre"> </span>/* 在不考虑水线的情况下,分配到了内存 */goto got_pg;}}/* Atomic allocations - we can't balance anything */if (!wait)goto nopage;/* Avoid recursion of direct reclaim *//* 调用者本身就是内存回收进程,不能进入后面的内存回收处理流程,否则死锁 */if (current->flags & PF_MEMALLOC)goto nopage;/* Avoid allocations with no watermarks from looping endlessly */ /**