1. 内存分配函数
相关代码如下:
#define alloc_pages(gfp_mask, order) alloc_pages_node(numa_node_id(), gfp_mask, order)
#define alloc_page_vma(gfp_mask, vma, addr) alloc_pages(gfp_mask, 0)
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
#define __get_free_page(gfp_mask) __get_free_pages((gfp_mask),0)
#define __get_dma_pages(gfp_mask, order) __get_free_pages((gfp_mask) | GFP_DMA,(order))
#define pfn_to_page(pfn) (mem_map + ((pfn) - PHYS_PFN_OFFSET))
#define page_to_pfn(page) ((unsigned long)((page) - mem_map) + PHYS_PFN_OFFSET)
#define pfn_valid(pfn) ((pfn) >= PHYS_PFN_OFFSET && (pfn) < (PHYS_PFN_OFFSET + max_mapnr))
#define phys_to_page(phys) (pfn_to_page(phys >> PAGE_SHIFT))
#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
1)__get_free_pages实现代码如下,它返回页的虚拟地址:
unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) { struct page *page; /* * __get_free_pages() returns a 32-bit address, which cannot represent * a highmem page */ VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); page = alloc_pages(gfp_mask, order); if (!page) return 0; return (unsigned long) page_address(page); }
/** * page_address - get the mapped virtual address of a page * @page: &struct page to get the virtual address of * * Returns the page's virtual address. */ void *page_address(struct page *page) { unsigned long flags; void *ret; struct page_address_slot *pas; if (!PageHighMem(page)) return lowmem_page_address(page); pas = page_slot(page); ret = NULL; spin_lock_irqsave(&pas->lock, flags); if (!list_empty(&pas->lh)) { struct page_address_map *pam; list_for_each_entry(pam, &pas->lh, list) { if (pam->page == page) { ret = pam->virtual; goto done; } } } done: spin_unlock_irqrestore(&pas->lock, flags); return ret; }
static __always_inline void *lowmem_page_address(struct page *page) { return __va(PFN_PHYS(page_to_pfn(page))); }
2)alloc_pages_node
static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { /* Unknown node is current node */ if (nid < 0) nid = numa_node_id(); return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask)); }
参数nid是要分配内存的 NUMA节点 ID,
参数gfp_mask是 GFP_分配标志,
参数order是分配内存的大小(2^order个页面).
返回值是一个指向第一个(可能返回多个页)page结构的指针,失败时返回NULL。
static inline struct page * __alloc_pages(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist) { return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL); }
/* * This is the 'heart' of the zoned buddy allocator. */ struct page * __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, nodemask_t *nodemask) { enum zone_type high_zoneidx = gfp_zone(gfp_mask); struct zone *preferred_zone; struct page *page; int migratetype = allocflags_to_migratetype(gfp_mask); gfp_mask &= gfp_allowed_mask; lockdep_trace_alloc(gfp_mask); might_sleep_if(gfp_mask & __GFP_WAIT); if (should_fail_alloc_page(gfp_mask, order)) return NULL; /* * Check the zones suitable for the gfp_mask contain at least one * valid zone. It's possible to have an empty zonelist as a result * of GFP_THISNODE and a memoryless node */ if (unlikely(!zonelist->_zonerefs->zone)) return NULL; get_mems_allowed(); /* The preferred zone is used for statistics later */ first_zones_zonelist(zonelist, high_zoneidx, nodemask ? : &cpuset_current_mems_allowed, &preferred_zone); if (!preferred_zone) { put_mems_allowed(); return NULL; } /* First allocation attempt */ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET, preferred_zone, migratetype); if (unlikely(!page)) page = __alloc_pages_slowpath(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, migratetype); put_mems_allowed(); trace_mm_page_alloc(page, order, gfp_mask, migratetype); return page; }
其接下来的主要调用流程如下:
get_page_from_freelist->
buffered_rmqueue
3) buffered_rmqueue
从区域zone中获取一块大小为2^order的物理内存块,返回该内存块的首个页框的描述符page。
static inline struct page *buffered_rmqueue(struct zone *preferred_zone, struct zone *zone, int order, gfp_t gfp_flags, int migratetype) { unsigned long flags; struct page *page; int cold = !!(gfp_flags & __GFP_COLD); again: if (likely(order == 0)) { //获取一页物理内存(2^0),从当前cpu的高速缓存内存中申请 struct per_cpu_pages *pcp; struct list_head *list; local_irq_save(flags); pcp = &this_cpu_ptr(zone->pageset)->pcp; //获取zone的当前处理器的高速缓存内存描述结构指针 list = &pcp->lists[migratetype]; if (list_empty(list)) { //高速缓存内存为空 pcp->count += rmqueue_bulk(zone, 0,//调用此函数从伙伴系统中分配batch空闲内存到高速缓存内存中 pcp->batch, list, migratetype, cold); if (unlikely(list_empty(list))) goto failed; } //我们从pcp->list链表开始的第一个lru起,去寻找相应的struct page结构体 if (cold) page = list_entry(list->prev, struct page, lru); else page = list_entry(list->next, struct page, lru); //由于被分配出去了,所以高速缓存内存中不再包含这页内存,所以从链表里删除这一项。 list_del(&page->lru); pcp->count--; //相应的当前页数也要减少 } else { //获取一块物理内存(2^order) if (unlikely(gfp_flags & __GFP_NOFAIL)) { /* * __GFP_NOFAIL is not to be used in new code. * * All __GFP_NOFAIL callers should be fixed so that they * properly detect and handle allocation failures. * * We most definitely don't want callers attempting to * allocate greater than order-1 page units with * __GFP_NOFAIL. */ WARN_ON_ONCE(order > 1); } spin_lock_irqsave(&zone->lock, flags); page = __rmqueue(zone, order, migratetype); //调用函数申请内存 spin_unlock(&zone->lock); if (!page) goto failed; __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); } __count_zone_vm_events(PGALLOC, zone, 1 << order); zone_statistics(preferred_zone, zone, gfp_flags); local_irq_restore(flags); VM_BUG_ON(bad_range(zone, page)); if (prep_new_page(page, order, gfp_flags)) goto again; return page; //返回申请到的内存空间的首页内存页的struct page结构指针 failed: local_irq_restore(flags); return NULL; }
4) rmqueue_bulk
用于多次(count)内存申请.
/* * Obtain a specified number of elements from the buddy allocator, all under * a single hold of the lock, for efficiency. Add them to the supplied list. * Returns the number of new pages which were placed at *list. */ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, int cold) { int i; spin_lock(&zone->lock); for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype); if (unlikely(page == NULL)) break; /* * Split buddy pages returned by expand() are received here * in physical page order. The page is added to the callers and * list and the list head then moves forward. From the callers * perspective, the linked list is ordered by page number in * some conditions. This is useful for IO devices that can * merge IO requests if the physical pages are ordered * properly. */ if (likely(cold == 0)) list_add(&page->lru, list); else list_add_tail(&page->lru, list); set_page_private(page, migratetype); list = &page->lru; } __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); spin_unlock(&zone->lock); return i; }
5) __rmqueue
用于一次内存申请。
/* * Do the hard work of removing an element from the buddy allocator. * Call me with the zone->lock already held. */ static struct page *__rmqueue(struct zone *zone, unsigned int order, int migratetype) { struct page *page; retry_reserve: page = __rmqueue_smallest(zone, order, migratetype); if (unlikely(!page) && migratetype != MIGRATE_RESERVE) { page = __rmqueue_fallback(zone, order, migratetype); /* * Use MIGRATE_RESERVE rather than fail an allocation. goto * is used because __rmqueue_smallest is an inline function * and we want just one call site */ if (!page) { migratetype = MIGRATE_RESERVE; goto retry_reserve; } } trace_mm_page_alloc_zone_locked(page, order, migratetype); return page; }
2. 内存释放函数
相关宏定义如下:
#define __free_page(page) __free_pages((page), 0) #define free_page(addr) free_pages((addr),0)