这里主要分析zone/zonelist的初始化,以及把系统空闲内存释放到伙伴系统
1.zone的初始化
调用关系:start_kernel->setup_arch->paging_init->bootmem_init->zone_sizes_init->free_area_init_node->free_area_init_core->
zone_size_init: 计算每个zone能够管理的页面数,以及起始pfn号,初始化zone的等待队列hash表, 以及非常重要的free list链表
static void __paginginit free_area_init_core(struct pglist_data *pgdat)
{
enum zone_type j;
int nid = pgdat->node_id;
unsigned long zone_start_pfn = pgdat->node_start_pfn;
int ret;
pgdat_resize_init(pgdat);
init_waitqueue_head(&pgdat->kswapd_wait);
init_waitqueue_head(&pgdat->pfmemalloc_wait);
pgdat_page_ext_init(pgdat);
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
unsigned long size, realsize, freesize, memmap_pages;
size = zone->spanned_pages;
realsize = freesize = zone->present_pages;
lruvec_init(&zone->lruvec);
if (!size)
continue;
set_pageblock_order();
setup_usemap(pgdat, zone, zone_start_pfn, size);
/*初始化zone的等待队列表和free list */
ret = init_currently_empty_zone(zone, zone_start_pfn, size);
BUG_ON(ret);
memmap_init(size, nid, j, zone_start_pfn);
zone_start_pfn += size;
}
}
int __meminit init_currently_empty_zone(struct zone *zone,
unsigned long zone_start_pfn,
unsigned long size)
{
struct pglist_data *pgdat = zone->zone_pgdat;
int ret;
/*初始化等待队列hash表 */
ret = zone_wait_table_init(zone, size);
if (ret)
return ret;
pgdat->nr_zones = zone_idx(zone) + 1;
zone->zone_start_pfn = zone_start_pfn;
/*初始化free list */
zone_init_free_lists(zone);
return 0;
}
1.1.2 pageblock 迁移类型初始化
memmap_init:
1. 关联page和zone/node id
2. 设置每个pageblock的migrate type
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn, enum memmap_context context)
{
pg_data_t *pgdat = NODE_DATA(nid);
unsigned long end_pfn = start_pfn + size;
unsigned long pfn;
struct zone *z;
unsigned long nr_initialised = 0;
if (highest_memmap_pfn < end_pfn - 1)
highest_memmap_pfn = end_pfn - 1;
z = &pgdat->node_zones[zone];
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
/*pageblock的第一个page时,设置每个pageblock的迁移类型为MOVABLE */
if (!(pfn & (pageblock_nr_pages - 1))) {
struct page *page = pfn_to_page(pfn);
/*关联page与zone/node,初始化page引用计数 */
__init_single_page(page, pfn, zone, nid);
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
} else {
__init_single_pfn(pfn, zone, nid);
}
}
}
set_pageblock_migratetype:设置page所在pageblock的迁移类型
get_pageblock_migratetype :获取page所在pageblcok的迁移类型
2. zonelist的初始化
start_kernel->build_all_zonelists->build_all_zonelists_init->__build_all_zonelists->build_zonelists
2.1关键数据结构
struct pglist_data {
struct zone node_zones[MAX_NR_ZONES];
struct zonelist node_zonelists[MAX_ZONELISTS];
}
其中MAX_NR_ZONES=3,MAX_ZONELIST=1,而MAX_ZONES_PER_ZONELIST=4
struct zonelist {
struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];
};
struct zoneref {
struct zone *zone; /* Pointer to actual zone */
int zone_idx; /* zone_idx(zoneref->zone) */
};
2.2 zonelist函数
static void build_zonelists(pg_data_t *pgdat)
{
int node, local_node;
enum zone_type j;
struct zonelist *zonelist;
local_node = pgdat->node_id;
/*选择第一个zonelist,实际上也只有一个 */
zonelist = &pgdat->node_zonelists[0];
j = build_zonelists_node(pgdat, zonelist, 0);
return NULL;
}
static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist,
int nr_zones)
{
struct zone *zone;
enum zone_type zone_type = MAX_NR_ZONES;/*MAX_NR_ZONES=3 */
do {
zone_type--;//zone_type=2,为ZONE_MOVEBLE
zone = pgdat->node_zones + zone_type;
if (populated_zone(zone)) {//ZONE_MOVEBLE没有使用,第一个填充的时ZONE_NORMAL
zoneref_set_zone(zone,
&zonelist->_zonerefs[nr_zones++]);//建立zonerefs与zone关系
check_highest_zone(zone_type);
}
} while (zone_type);
return nr_zones;
}
最终的关系:
3.释放内存到伙伴系统
函数调用关系
start_kernel->mm_init->mem_init->free_all_bootmem->free_low_memory_core_early->__free_memory_core->__free_pages_memory
static void __init __free_pages_memory(unsigned long start, unsigned long end)
{
int order;
while (start < end) {
/* 找出start的第一个位为1的bit,如果start=8,则__ffs(8)返回3,
那么从pfn=8的起始页面到pfn16共8个页面,会被挂到order=3的队列*/
order = min(MAX_ORDER - 1UL, __ffs(start));
while (start + (1UL << order) > end)
order--;
/*调用__free_pages到 */
__free_pages_bootmem(pfn_to_page(start), start, order);
start += (1UL << order);
}
}
根据公式:order = min(MAX_ORDER - 1UL, __ffs(start))有以下结论:
每个内存块(order)的起始物理地址都是自身内存块大小的整数倍(pfn<
伙伴系统内存的分配和释放,slab/vmalloc模块在另行分析