内核中所有涉及到页面释放的函数最终都会调用到__free_pages
1683 fastcall void __free_pages(struct page *page, unsigned int order) 1684 { 1685 if (put_page_testzero(page)) { 1686 if (order == 0) 1687 free_hot_page(page); 1688 else 1689 __free_pages_ok(page, order); 1690 } 1691 }
free_hot_page仅仅是free_hot_cold_page的一个封装
985 /* 986 * Free a 0-order page 987 */ 988 static void fastcall free_hot_cold_page(struct page *page, int cold) 989 { 990 struct zone *zone = page_zone(page); 991 struct per_cpu_pages *pcp; 992 unsigned long flags; 993 994 if (PageAnon(page)) 995 page->mapping = NULL; 996 if (free_pages_check(page)) 997 return; 998 999 if (!PageHighMem(page)) 1000 debug_check_no_locks_freed(page_address(page), PAGE_SIZE); 1001 arch_free_page(page, 0); 1002 kernel_map_pages(page, 1, 0); 1003 1004 pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; 1005 local_irq_save(flags); 1006 __count_vm_event(PGFREE); 1007 list_add(&page->lru, &pcp->list); 1008 set_page_private(page, get_pageblock_migratetype(page)); 1009 pcp->count++; 1010 if (pcp->count >= pcp->high) { 1011 free_pages_bulk(zone, pcp->batch, &pcp->list, 0); 1012 pcp->count -= pcp->batch; 1013 } 1014 local_irq_restore(flags); 1015 put_cpu(); 1016 }
这个函数都是些奇怪的检查,不深究了
1010 如果缓存的数目超过了一定数目,那么就把数量为pcp->batch的页面一次性的还给buddy系统。该策略称为惰性合并。防止反复的释放-> 分配 ->释放操作。
__free_pages_ok经过一些奇怪的检查,调用到__free_one_page
392 * The concept of a buddy system is to maintain direct-mapped table 393 * (containing bit values) for memory blocks of various "orders". 394 * The bottom level table contains the map for the smallest allocatable 395 * units of memory (here, pages), and each level above it describes 396 * pairs of units from the levels below, hence, "buddies". 397 * At a high level, all that happens here is marking the table entry 398 * at the bottom level available, and propagating the changes upward 399 * as necessary, plus some accounting needed to play nicely with other 400 * parts of the VM system. 401 * At each level, we keep a list of pages, which are heads of continuous 402 * free pages of length of (1 << order) and marked with PG_buddy. Page's 403 * order is recorded in page_private(page) field. 404 * So when we are allocating or freeing one, we can derive the state of the 405 * other. That is, if we allocate a small block, and both were 406 * free, the remainder of the region must be split into blocks. 407 * If a block is freed, and its buddy is also free, then this 408 * triggers coalescing into a block of larger size. 409 * 410 * -- wli 411 */ 412 413 static inline void __free_one_page(struct page *page, 414 struct zone *zone, unsigned int order) 415 { 416 unsigned long page_idx; 417 int order_size = 1 << order; 418 int migratetype = get_pageblock_migratetype(page); 419 420 if (unlikely(PageCompound(page))) 421 destroy_compound_page(page, order); 422 423 page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); 424 425 VM_BUG_ON(page_idx & (order_size - 1)); 426 VM_BUG_ON(bad_range(zone, page)); 427 428 __mod_zone_page_state(zone, NR_FREE_PAGES, order_size); 429 while (order < MAX_ORDER-1) { 430 unsigned long combined_idx; 431 struct page *buddy; 432 433 buddy = __page_find_buddy(page, page_idx, order); 434 if (!page_is_buddy(page, buddy, order)) 435 break; /* Move the buddy up one level. */ 436 437 list_del(&buddy->lru); 438 zone->free_area[order].nr_free--; 439 rmv_page_order(buddy); 440 combined_idx = __find_combined_index(page_idx, order); 441 page = page + (combined_idx - page_idx); 442 page_idx = combined_idx; 443 order++; 444 } 445 set_page_order(page, order); 446 list_add(&page->lru, 447 &zone->free_area[order].free_list[migratetype]); 448 zone->free_area[order].nr_free++; 449 }
该函数是__free_pages的核心
429 ~ 444 在把内存还给buddy系统前,首先检查这个内存区的伙伴是否是空闲的,如果是则进行合并,转移到更高阶的链表,直到无法合并为止。
433 __page_find_buddy是一个辅助函数,负责找到给定page和order对应的buddy
434 判断page和它的buddy是不是真正的buddy,也就是说可以合并的,不是则终止循环动作
440~442 page和它的buddy合并后,我们需要用合并后区域的首个页面page代表这个内存区
445 从这一行我们可以看出,仅仅内存区的首page需要修改order和buddy属性。
447 把这个内存区通过lru链接到相应的迁移类型链表
448 增加free_area中的空闲计数