===================================
本文系本站原创,欢迎转载!
转载请注明出处:http://blog.csdn.net/gdt_A20
===================================
一、摘要
最重要的还是分配与释放,下面看一下相关的几个函数;
分配函数:
1.alloc_pages(gfp_t gfp_mask, unsigned int order);
用于请求2^order次方个连续的页,返回起始页的描述符;
2. alloc_page(gfp_mask);
请求分配一个单独的页,返回描述符;
3.__get_free_page(gfp_mask) ;
申请单独的页,但是返回它的线性地址;
4._ _get_free_pages(gfp_mask, order);
用于请求2^order次方个连续的页,但是返回起始页的线性地址;
5.get_zeroed_page(gfp_mask);
请求一页,并且将该页清0,返回其线性地址;
6._ _get_dma_pages(gfp_mask, order);
用于请求2^order次方个连续的dma页,返回其描述符
释放函数:
1.free_pages(addr, order);
注意只有到count为0的时候才真正的释放掉;
2.free_page(addr);
3.__free_page(page);
二、下面具体的看一下这些函数.
include/linux/gfp.h
1.alloc_page(gfp_mask);
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
2..__get_free_page(gfp_mask) ;
#define __get_free_page(gfp_mask) \
__get_free_pages((gfp_mask), 0)
3._ _get_free_pages(gfp_mask, order);
mm/page_alloc.c
unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) { struct page *page; /* * __get_free_pages() returns a 32-bit address, which cannot represent * a highmem page */ VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); page = alloc_pages(gfp_mask, order); if (!page) return 0; return (unsigned long) page_address(page); }
4.get_zeroed_page(gfp_mask);
unsigned long get_zeroed_page(gfp_t gfp_mask)
{
return __get_free_pages(gfp_mask | __GFP_ZERO, 0);
}
5.._ _get_dma_pages(gfp_mask, order);
#define __get_dma_pages(gfp_mask, order) \ __get_free_pages((gfp_mask) | GFP_DMA, (order))
看来都是一路货色,都最后调用了alloc_pages(gfp_t gfp_mask, unsigned int order);
详细的看一下这个函数吧:
include/linux/gfp.h
#define alloc_pages(gfp_mask, order) \ alloc_pages_node(numa_node_id(), gfp_mask, order)在当前节点中分配页,
static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { /* Unknown node is current node */ if (nid < 0) nid = numa_node_id(); return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask)); }
static inline struct zonelist *node_zonelist(int nid, gfp_t flags) { return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags); }
static inline struct page * __alloc_pages(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist) { return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL); }
struct page * __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, nodemask_t *nodemask) { enum zone_type high_zoneidx = gfp_zone(gfp_mask); //根据mask选zone struct zone *preferred_zone; struct page *page; int migratetype = allocflags_to_migratetype(gfp_mask); //选择一个类型的空闲表 gfp_mask &= gfp_allowed_mask; lockdep_trace_alloc(gfp_mask); might_sleep_if(gfp_mask & __GFP_WAIT); //是否可以睡眠 if (should_fail_alloc_page(gfp_mask, order)) return NULL; /* * Check the zones suitable for the gfp_mask contain at least one * valid zone. It's possible to have an empty zonelist as a result * of GFP_THISNODE and a memoryless node */ if (unlikely(!zonelist->_zonerefs->zone)) //如果没有管理区就返回了 return NULL; get_mems_allowed(); /* The preferred zone is used for statistics later */ first_zones_zonelist(zonelist, high_zoneidx, //根据传入参数找到对应的zone进行分配 nodemask ? : &cpuset_current_mems_allowed, &preferred_zone); if (!preferred_zone) { //如果失败,返回 put_mems_allowed(); return NULL; } /* First allocation attempt */ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, //对应下面的slow这是一个快速的分配,
zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET, //此时water线比较高也没有关系 preferred_zone, migratetype); if (unlikely(!page)) page = __alloc_pages_slowpath(gfp_mask, order, //在water高的时候无法分配,可能需要降低一下water线, zonelist, high_zoneidx, nodemask, //可能会启动页面回收进程进行页面回收 preferred_zone, migratetype); put_mems_allowed(); trace_mm_page_alloc(page, order, gfp_mask, migratetype); return page; }对于该函数,kernel会根据gfp_mask标志走不同的路径,比如能睡眠情况,不能睡眠情况,高低端内存情况等等;
分配部分就到这里了,就不往下再贴了,大致过程就是这样;
下面看一下释放函数;
1.free_page(addr);
#define free_page(addr) free_pages((addr), 0)
2.__free_page(page);
#define __free_page(page) __free_pages((page), 0)
void __free_pages(struct page *page, unsigned int order) { if (put_page_testzero(page)) { if (order == 0) free_hot_cold_page(page, 0); else __free_pages_ok(page, order); } }
free_page也会调用掉这里,
void free_pages(unsigned long addr, unsigned int order) { if (addr != 0) { VM_BUG_ON(!virt_addr_valid((void *)addr)); __free_pages(virt_to_page((void *)addr), order); } }
void __free_pages(struct page *page, unsigned int order) { if (put_page_testzero(page)) { if (order == 0) //单个order为0,释放到冷热页缓存 free_hot_cold_page(page, 0); else __free_pages_ok(page, order); //否则释放到buddy } }冷热页部分;
/* * Free a 0-order page * cold == 1 ? free a cold page : free a hot page */ void free_hot_cold_page(struct page *page, int cold) { struct zone *zone = page_zone(page); //page所在zone struct per_cpu_pages *pcp; unsigned long flags; int migratetype; int wasMlocked = __TestClearPageMlocked(page); if (!free_pages_prepare(page, 0)) return; migratetype = get_pageblock_migratetype(page); //page属于的链表 set_page_private(page, migratetype); local_irq_save(flags); if (unlikely(wasMlocked)) free_page_mlock(page); __count_vm_event(PGFREE); /* * We only track unmovable, reclaimable and movable on pcp lists. * Free ISOLATE pages back to the allocator because they are being * offlined but treat RESERVE as movable pages so we can get those * areas back if necessary. Otherwise, we may have to free * excessively into the page allocator */ if (migratetype >= MIGRATE_PCPTYPES) { //不是冷热页的内存 if (unlikely(migratetype == MIGRATE_ISOLATE)) { free_one_page(zone, page, 0, migratetype); //释放到对应空闲链表 goto out; } migratetype = MIGRATE_MOVABLE; } pcp = &this_cpu_ptr(zone->pageset)->pcp; //得到该cpu冷热也结构 if (cold) list_add_tail(&page->lru, &pcp->lists[migratetype]); //冷页加入冷链表 else list_add(&page->lru, &pcp->lists[migratetype]); //热页加入热链表 pcp->count++; if (pcp->count >= pcp->high) { //冷热页太多了,超了就释放点到buddy中 free_pcppages_bulk(zone, pcp->batch, pcp); pcp->count -= pcp->batch; } out: local_irq_restore(flags); }
tatic void __free_pages_ok(struct page *page, unsigned int order) { unsigned long flags; int wasMlocked = __TestClearPageMlocked(page); if (!free_pages_prepare(page, order)) return; local_irq_save(flags); if (unlikely(wasMlocked)) free_page_mlock(page); __count_vm_events(PGFREE, 1 << order); free_one_page(page_zone(page), page, order, get_pageblock_migratetype(page)); local_irq_restore(flags); }
static void free_one_page(struct zone *zone, struct page *page, int order, int migratetype) { spin_lock(&zone->lock); zone->all_unreclaimable = 0; zone->pages_scanned = 0; __free_one_page(page, zone, order, migratetype); __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); spin_unlock(&zone->lock); }
要释放的页,页所在的区,页所在区的order,哪个链表,
static inline void __free_one_page(struct page *page, struct zone *zone, unsigned int order, int migratetype) { unsigned long page_idx; unsigned long combined_idx; unsigned long uninitialized_var(buddy_idx); struct page *buddy; if (unlikely(PageCompound(page))) if (unlikely(destroy_compound_page(page, order))) return; VM_BUG_ON(migratetype == -1); page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); //页号 VM_BUG_ON(page_idx & ((1 << order) - 1)); VM_BUG_ON(bad_range(zone, page)); while (order < MAX_ORDER-1) { //2^3,order代表3 buddy_idx = __find_buddy_index(page_idx, order); //找朋友,找伙伴^.^!,找到在伙伴的位置,或者前或者后 buddy = page + (buddy_idx - page_idx); //找到伙伴的下标 if (!page_is_buddy(page, buddy, order)) //不满足合并条件那么退出 break; /* Our buddy is free, merge with it and move up one order. */ list_del(&buddy->lru); //我们的伙伴很清闲,摘除他 zone->free_area[order].nr_free--; //将对应zone区域的order伙伴的空闲链表-1 rmv_page_order(buddy); //clear buddy标志, combined_idx = buddy_idx & page_idx; //得到一个新的下标 page = page + (combined_idx - page_idx); //得到新下标page, page_idx = combined_idx; //更新新下标标号 order++; //以新下标为基础找更大的伙伴进行合并 } set_page_order(page, order); //跳出的时候:可能是不满足伙伴,另外可能是循环到达重点 /* * If this is not the largest possible page, check if the buddy * of the next-highest order is free. If it is, it's possible * that pages are being freed that will coalesce soon. In case, * that is happening, add the free page to the tail of the list * so it's less likely to be used soon and more likely to be merged * as a higher order page */ if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) { //去除循环到达重点的情况,关注伙伴不满足的情况 struct page *higher_page, *higher_buddy; combined_idx = buddy_idx & page_idx; //得到一个新下标 higher_page = page + (combined_idx - page_idx); //得到新下标对应的页 buddy_idx = __find_buddy_index(combined_idx, order + 1); //得到order大一的伙伴 higher_buddy = page + (buddy_idx - combined_idx); //以新下标得到order大一的伙伴地址 if (page_is_buddy(higher_page, higher_buddy, order + 1)) { //判断新下标和order+1是否满足伙伴合并条件 list_add_tail(&page->lru, &zone->free_area[order].free_list[migratetype]); //如果是把它加入对应链表,并且退出 goto out; } } list_add(&page->lru, &zone->free_area[order].free_list[migratetype]); //如果不满足伙伴条件并且无法再合并那么加入对应order的空闲链表 out: zone->free_area[order].nr_free++; }结果:对应页找到最大的伙伴合并进去
/* * Locate the struct page for both the matching buddy in our * pair (buddy1) and the combined O(n+1) page they form (page). * * 1) Any buddy B1 will have an order O twin B2 which satisfies * the following equation: * B2 = B1 ^ (1 << O) * For example, if the starting buddy (buddy2) is #8 its order * 1 buddy is #10: * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10 * * 2) Any buddy B will have an order O+1 parent P which * satisfies the following equation: * P = B & ~(1 << O) * * Assumption: *_mem_map is contiguous at least up to MAX_ORDER */ static inline unsigned long __find_buddy_index(unsigned long page_idx, unsigned int order) { return page_idx ^ (1 << order); }//计算寻找伙伴,如果order为0,page_idx==0,那么伙伴就是0^1 == 1.
####找到伙伴还要确定是否为可以合并 /* * This function checks whether a page is free && is the buddy * we can do coalesce a page and its buddy if * (a) the buddy is not in a hole && * (b) the buddy is in the buddy system && * (c) a page and its buddy have the same order && * (d) a page and its buddy are in the same zone. * * For recording whether a page is in the buddy system, we set ->_mapcount -2. * Setting, clearing, and testing _mapcount -2 is serialized by zone->lock. * * For recording page's order, we use page_private(page). */ static inline int page_is_buddy(struct page *page, struct page *buddy, int order) { if (!pfn_valid_within(page_to_pfn(buddy))) //确定该内存是实际可用内存不是空洞 return 0; if (page_zone_id(page) != page_zone_id(buddy)) //该页面和伙伴页面在一个zone return 0; if (PageBuddy(buddy) && page_order(buddy) == order) { //order必须一样 VM_BUG_ON(page_count(buddy) != 0); return 1; } return 0; } ####由此可以看出合并为伙伴的前提条件:
1.不是空洞
2.相同的zone
3.order一样
三、总结
buddy的分配释放函数就到这里了.