对于linux内核来说,整个memory系统初始化由函数free_area_init_node()处理,对于多node的则可以
调用函数free_area_init_nodes()。这些函数通过进步调用free_area_init_core()完成对整个node的初始
化。
函数free_area_init_node()主要根据提供的nid和每个zone大小对node下的各个zone初始化,整个node的初始
页帧由参数node_start_pfn给出。
函数calculate_node_totalpages()计算整个node下物理内存多少,确定node下node_spanned_pages和
node_present_pages大小。当然可能情形下确定zone下的spanned_pages和present_pages值。
之后调用函数alloc_node_mem_map()对node下管理的内存分配一些struct page对象进行维护,注意,这里
如果是单node系统,mem_map直接来自node下的node_mem_map。这块物理内存总大小为:
(end - start) * sizeof(struct page)
一般通过memblock来获取。
之后通过函数free_area_init_core()完成进一步初始化。
mm/page_alloc.c
void __paginginit free_area_init_node(int nid, unsigned long *zones_size, unsigned long node_start_pfn, unsigned long *zholes_size) { pg_data_t *pgdat = NODE_DATA(nid); unsigned long start_pfn = 0; unsigned long end_pfn = 0;
/* pg_data_t should be reset to zero when it's allocated */ WARN_ON(pgdat->nr_zones || pgdat->kswapd_classzone_idx);
reset_deferred_meminit(pgdat); pgdat->node_id = nid; pgdat->node_start_pfn = node_start_pfn; pgdat->per_cpu_nodestats = NULL; #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid, (u64)start_pfn << PAGE_SHIFT, end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0); #else start_pfn = node_start_pfn; #endif calculate_node_totalpages(pgdat, start_pfn, end_pfn,zones_size, zholes_size);
alloc_node_mem_map(pgdat); #ifdef CONFIG_FLAT_NODE_MEM_MAP printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n", nid, (unsigned long)pgdat, (unsigned long)pgdat->node_mem_map); #endif
free_area_init_core(pgdat); }
函数free_area_init_core(pgdat)是整个初始化的关键:
/* * Set up the zone data structures: * - mark all pages reserved * - mark all memory queues empty * - clear the memory bitmaps * * NOTE: pgdat should get zeroed by caller. */ static void __paginginit free_area_init_core(struct pglist_data *pgdat) { enum zone_type j; int nid = pgdat->node_id; int ret;
pgdat_resize_init(pgdat); #ifdef CONFIG_NUMA_BALANCING spin_lock_init(&pgdat->numabalancing_migrate_lock); pgdat->numabalancing_migrate_nr_pages = 0; pgdat->numabalancing_migrate_next_window = jiffies; #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE spin_lock_init(&pgdat->split_queue_lock); INIT_LIST_HEAD(&pgdat->split_queue); pgdat->split_queue_len = 0; #endif init_waitqueue_head(&pgdat->kswapd_wait); init_waitqueue_head(&pgdat->pfmemalloc_wait); #ifdef CONFIG_COMPACTION init_waitqueue_head(&pgdat->kcompactd_wait); #endif pgdat_page_ext_init(pgdat); spin_lock_init(&pgdat->lru_lock); lruvec_init(node_lruvec(pgdat));
for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; unsigned long size, realsize, freesize, memmap_pages; unsigned long zone_start_pfn = zone->zone_start_pfn;
size = zone->spanned_pages; realsize = freesize = zone->present_pages;
/* * Adjust freesize so that it accounts for how much memory * is used by this zone for memmap. This affects the watermark * and per-cpu initialisations */ memmap_pages = calc_memmap_size(size, realsize); if (!is_highmem_idx(j)) { if (freesize >= memmap_pages) { freesize -= memmap_pages; if (memmap_pages) printk(KERN_DEBUG " %s zone: %lu pages used for memmap\n", zone_names[j], memmap_pages); } else pr_warn(" %s zone: %lu pages exceeds freesize %lu\n", zone_names[j], memmap_pages, freesize); }
/* Account for reserved pages */ if (j == 0 && freesize > dma_reserve) { freesize -= dma_reserve; printk(KERN_DEBUG " %s zone: %lu pages reserved\n", zone_names[0], dma_reserve); }
if (!is_highmem_idx(j)) nr_kernel_pages += freesize; /* Charge for highmem memmap if there are enough kernel pages */ else if (nr_kernel_pages > memmap_pages * 2) nr_kernel_pages -= memmap_pages; nr_all_pages += freesize;
/* * Set an approximate value for lowmem here, it will be adjusted * when the bootmem allocator frees pages into the buddy system. * And all highmem pages will be managed by the buddy system. */ zone->managed_pages = is_highmem_idx(j) ? realsize : freesize; #ifdef CONFIG_NUMA zone->node = nid; #endif zone->name = zone_names[j]; zone->zone_pgdat = pgdat; spin_lock_init(&zone->lock); zone_seqlock_init(zone); zone_pcp_init(zone);
if (!size) continue;
set_pageblock_order(); setup_usemap(pgdat, zone, zone_start_pfn, size); ret = init_currently_empty_zone(zone, zone_start_pfn, size); BUG_ON(ret); memmap_init(size, nid, j, zone_start_pfn); } }