Linux 内存管理之node初始化

 
  

对于linux内核来说,整个memory系统初始化由函数free_area_init_node()处理,对于多node的则可以

调用函数free_area_init_nodes()。这些函数通过进步调用free_area_init_core()完成对整个node的初始

化。

函数free_area_init_node()主要根据提供的nid和每个zone大小对node下的各个zone初始化,整个node的初始
页帧由参数node_start_pfn给出。
函数calculate_node_totalpages()计算整个node下物理内存多少,确定node下node_spanned_pages和
node_present_pages大小。当然可能情形下确定zone下的spanned_pages和present_pages值。
之后调用函数alloc_node_mem_map()对node下管理的内存分配一些struct page对象进行维护,注意,这里
如果是单node系统,mem_map直接来自node下的node_mem_map。这块物理内存总大小为:
(end - start) * sizeof(struct page)
一般通过memblock来获取。
 
之后通过函数free_area_init_core()完成进一步初始化。
 
mm/page_alloc.c
 
  

void __paginginit free_area_init_node(int nid, unsigned long *zones_size,   unsigned long node_start_pfn, unsigned long *zholes_size) {  pg_data_t *pgdat = NODE_DATA(nid);  unsigned long start_pfn = 0;  unsigned long end_pfn = 0;

 /* pg_data_t should be reset to zero when it's allocated */  WARN_ON(pgdat->nr_zones || pgdat->kswapd_classzone_idx);

 reset_deferred_meminit(pgdat);  pgdat->node_id = nid;  pgdat->node_start_pfn = node_start_pfn;  pgdat->per_cpu_nodestats = NULL; #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP  get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);  pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,   (u64)start_pfn << PAGE_SHIFT,   end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0); #else  start_pfn = node_start_pfn; #endif  calculate_node_totalpages(pgdat, start_pfn, end_pfn,zones_size, zholes_size);

 alloc_node_mem_map(pgdat); #ifdef CONFIG_FLAT_NODE_MEM_MAP  printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n",   nid, (unsigned long)pgdat,   (unsigned long)pgdat->node_mem_map); #endif

 free_area_init_core(pgdat); }

函数free_area_init_core(pgdat)是整个初始化的关键:

 

/*  * Set up the zone data structures:  *   - mark all pages reserved  *   - mark all memory queues empty  *   - clear the memory bitmaps  *  * NOTE: pgdat should get zeroed by caller.  */ static void __paginginit free_area_init_core(struct pglist_data *pgdat) {  enum zone_type j;  int nid = pgdat->node_id;  int ret;

 pgdat_resize_init(pgdat); #ifdef CONFIG_NUMA_BALANCING  spin_lock_init(&pgdat->numabalancing_migrate_lock);  pgdat->numabalancing_migrate_nr_pages = 0;  pgdat->numabalancing_migrate_next_window = jiffies; #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE  spin_lock_init(&pgdat->split_queue_lock);  INIT_LIST_HEAD(&pgdat->split_queue);  pgdat->split_queue_len = 0; #endif  init_waitqueue_head(&pgdat->kswapd_wait);  init_waitqueue_head(&pgdat->pfmemalloc_wait); #ifdef CONFIG_COMPACTION  init_waitqueue_head(&pgdat->kcompactd_wait); #endif  pgdat_page_ext_init(pgdat);  spin_lock_init(&pgdat->lru_lock);  lruvec_init(node_lruvec(pgdat));

 for (j = 0; j < MAX_NR_ZONES; j++) {   struct zone *zone = pgdat->node_zones + j;   unsigned long size, realsize, freesize, memmap_pages;   unsigned long zone_start_pfn = zone->zone_start_pfn;

  size = zone->spanned_pages;   realsize = freesize = zone->present_pages;

  /*    * Adjust freesize so that it accounts for how much memory    * is used by this zone for memmap. This affects the watermark    * and per-cpu initialisations    */   memmap_pages = calc_memmap_size(size, realsize);   if (!is_highmem_idx(j)) {    if (freesize >= memmap_pages) {     freesize -= memmap_pages;     if (memmap_pages)      printk(KERN_DEBUG             "  %s zone: %lu pages used for memmap\n",             zone_names[j], memmap_pages);    } else     pr_warn("  %s zone: %lu pages exceeds freesize %lu\n",      zone_names[j], memmap_pages, freesize);   }

  /* Account for reserved pages */   if (j == 0 && freesize > dma_reserve) {    freesize -= dma_reserve;    printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",      zone_names[0], dma_reserve);   }

  if (!is_highmem_idx(j))    nr_kernel_pages += freesize;   /* Charge for highmem memmap if there are enough kernel pages */   else if (nr_kernel_pages > memmap_pages * 2)    nr_kernel_pages -= memmap_pages;   nr_all_pages += freesize;

  /*    * Set an approximate value for lowmem here, it will be adjusted    * when the bootmem allocator frees pages into the buddy system.    * And all highmem pages will be managed by the buddy system.    */   zone->managed_pages = is_highmem_idx(j) ? realsize : freesize; #ifdef CONFIG_NUMA   zone->node = nid; #endif   zone->name = zone_names[j];   zone->zone_pgdat = pgdat;   spin_lock_init(&zone->lock);   zone_seqlock_init(zone);   zone_pcp_init(zone);

  if (!size)    continue;

  set_pageblock_order();   setup_usemap(pgdat, zone, zone_start_pfn, size);   ret = init_currently_empty_zone(zone, zone_start_pfn, size);   BUG_ON(ret);   memmap_init(size, nid, j, zone_start_pfn);  } }

 

 
 

 

你可能感兴趣的:(Linux,Kernel,内存管理)