内存_内存管理的不同阶段

linux内核的内存管理分三个阶段。
1. 启动---->bootmem初始化完成为第一阶段。此阶段只能使用memblock_reserve函数分配内存。
   此阶段结束标志为:init_bootmem_done = 1.
2. bootmem初始化完--->buddy完成前。结束标志为mem_init_done = 1.
3. 全部内存初始化完毕,可以用cache和buddy分配内存。

本文先分析第1阶段。
1. 初始化:head_fsl_booke.S->machine_init->early_init_devtree:

  1. mm/memblock.c
  2. 108 void __init memblock_init(void)
  3. 109 {
  4. 110 /* Create a dummy zero size MEMBLOCK which will get coalesced away later.
  5. 111 * This simplifies the memblock_add() code below...
  6. 112 */
  7. 113 memblock.memory.region[0].base= 0;
  8. 114 memblock.memory.region[0].size= 0;
  9. 115 memblock.memory.cnt= 1;
  10. 116
  11. 117 /* Ditto. */
  12. 118 memblock.reserved.region[0].base= 0;
  13. 119 memblock.reserved.region[0].size= 0;
  14. 120 memblock.reserved.cnt= 1;
  15. 121 }

2. 内存原始数据由u-boot传入,在初始化完memblock_init后,用memblock_add_region加入原始内存数据,我的板子上配了2G内存,即:0x0000 0000->0x80000000,加完后的配置如下:
MEMBLOCK configuration:
 rmo_size    = 0x80000000
 memory.size = 0x0
 memory.cnt  = 0x1
 memory[0x0]    0x0000000000000000 - 0x000000007fffffff, 0x80000000 bytes
 reserved.cnt  = 0x1
 reserved[0x0]  0x0000000000000000 - 0xffffffffffffffff, 0x0 bytes

3. memblock_reserve用来分配内存页。
以分配内核本身占用的内存为例:
/* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */
memblock_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START);
分配完之后的内存配置为:
MEMBLOCK configuration:
 rmo_size    = 0x80000000
 memory.size = 0x0
 memory.cnt  = 0x1
 memory[0x0]    0x0000000000000000 - 0x000000007fffffff, 0x80000000 bytes
 reserved.cnt  = 0x1
 reserved[0x0]  0x0000000000000000 - 0x00000000006b0fff, 0x6b1000 bytes
如果分配有连续则进行合并。
几次分配后的配置如下:
MEMBLOCK configuration:
 rmo_size    = 0x80000000
 memory.size = 0x80000000
 memory.cnt  = 0x1
 memory[0x0]    0x0000000000000000 - 0x000000007fffffff, 0x80000000 bytes
 reserved.cnt  = 0x6
 reserved[0x0]  0x0000000000000000 - 0x00000000006b0fff, 0x6b1000 bytes
 reserved[0x1]  0x0000000000ffa000 - 0x0000000000ffcfff, 0x3000 bytes
 reserved[0x2]  0x000000002fbc4000 - 0x000000002fbdefff, 0x1b000 bytes
 reserved[0x3]  0x000000002fbdfa88 - 0x000000002ffff4cc, 0x41fa45 bytes
 reserved[0x4]  0x000000002fbe4000 - 0x000000002ffff4cd, 0x41b4ce bytes
 reserved[0x5]  0x000000007ffff000 - 0x000000007fffffff, 0x1000 bytes
bootmem初始化完--->buddy完成前。结束标志为mem_init_done = 1
start_kernel->setup_arch->do_init_bootmem:

179 #ifndef CONFIG_NEED_MULTIPLE_NODES
180 void __init do_init_bootmem(void)
181 {
182     unsigned long i;
183     unsigned long start, bootmap_pages;
184     unsigned long total_pages;
185     int boot_mapsize;
186
187     max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
188     total_pages = (memblock_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT;
189 #ifdef CONFIG_HIGHMEM
190     total_pages = total_lowmem >> PAGE_SHIFT;
191     max_low_pfn = lowmem_end_addr >> PAGE_SHIFT;
192 #endif

此处的计算都是从第一阶段的memblock而来,到192行之后各变量值为:
max_low_pfn: 30000
max_pfn: 80000
total_pages: 30000
因为板子上含有highmem,所以total_pages和max_low_pfn都进行了调整,这些值只反映低端内存的情况。

194     /*
195      * Find an area to use for the bootmem bitmap.  Calculate the size of
196      * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE.
197      * Add 1 additional page in case the address isn't page-aligned.
198      */
199     bootmap_pages = bootmem_bootmap_pages(total_pages);
200
201     start = memblock_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE);
202
203     min_low_pfn = MEMORY_START >> PAGE_SHIFT;
204     boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn);

199:分配管理内存页所需要的内存,其中每页用1Bit表示,所以当有total_pages页时,大小为:
(Total Memory) / PAGE_SIZE / BITS_PER_BYTE。其中BITS_PER_BYTE=8
201:分配该BITMAP区域所占用的空间,还有是第一阶段的内存分配方法,返回的是内存物理地址。
204:初始化node节点,初始化bitmap区为0xff,即占用状态,返回bitmap区大小。
通过计算可知道,bitmap空间占6页,即:0x30000/4K/8bit=6page.

206     /* Add active regions with valid PFNs */
207     for (i = 0; i < memblock.memory.cnt; i++) {
208         unsigned long start_pfn, end_pfn;
209         start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT;
210         end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i);
211         add_active_range(0, start_pfn, end_pfn);
212     }

此循环只会执行一次,因为由第1阶段可知memblock.memory.cnt为1。
其中:start_pfn=0, end_pfn=80000,把这段内存pfn加入early_node_map[MAX_ACTIVE_REGIONS=32]中,如果有多个不连续段内存,会另入到不同的early_node_map中,最多32个段。early_node_map为临时使用,最终所占空间会被释放。

217 #ifdef CONFIG_HIGHMEM
218     free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT);
219
220     /* reserve the sections we're already using */
221     for (i = 0; i < memblock.reserved.cnt; i++) {
222         unsigned long addr = memblock.reserved.region[i].base +
223                      memblock_size_bytes(&memblock.reserved, i) - 1;
224         if (addr < lowmem_end_addr)
225             reserve_bootmem(memblock.reserved.region[i].base,
226                     memblock_size_bytes(&memblock.reserved, i),
227                     BOOTMEM_DEFAULT);
228         else if (memblock.reserved.region[i].base < lowmem_end_addr) {
229             unsigned long adjusted_size = lowmem_end_addr -
230                       memblock.reserved.region[i].base;
231             reserve_bootmem(memblock.reserved.region[i].base,
232                     adjusted_size, BOOTMEM_DEFAULT);
233         }
234     }
235 #else
236     free_bootmem_with_active_regions(0, max_pfn);
237
238     /* reserve the sections we're already using */
239     for (i = 0; i < memblock.reserved.cnt; i++)
240         reserve_bootmem(memblock.reserved.region[i].base,
241                 memblock_size_bytes(&memblock.reserved, i),
242                 BOOTMEM_DEFAULT);
243
244 #endif

由于存在highmem,所以执行上半段。
218:释放early_node_map数组标识的lowmem的内存段,即把0~30000的PFN对应的bitmap区中相应位置0.其中每bit标识一页,LOG信息为:
bootmem::mark_bootmem_node nid=0 start=0 end=30000 reserve=0 flags=0
bootmem::__free nid=0 start=0 end=30000
212:开始的for循环是把已经分配的内存页标记为1,即已在使用。LOG信息如下:
bootmem::mark_bootmem_node nid=0 start=0 end=6b1 reserve=1 flags=0
bootmem::__reserve nid=0 start=0 end=6b1 flags=0
bootmem::mark_bootmem_node nid=0 start=ffa end=ffd reserve=1 flags=0
bootmem::__reserve nid=0 start=ffa end=ffd flags=0
bootmem::mark_bootmem_node nid=0 start=2fbc4 end=2fbdf reserve=1 flags=0
bootmem::__reserve nid=0 start=2fbc4 end=2fbdf flags=0
bootmem::mark_bootmem_node nid=0 start=2fbdf end=30000 reserve=1 flags=0
bootmem::__reserve nid=0 start=2fbdf end=30000 flags=0
bootmem::mark_bootmem_node nid=0 start=2fbe4 end=30000 reserve=1 flags=0
bootmem::__reserve nid=0 start=2fbe4 end=30000 flags=0
只标记了低端内存,高端内存未做标记,即:
reserved[0x5]  0x000000007ffff000 - 0x000000007fffffff, 0x1000 bytes

246     sparse_memory_present_with_active_regions(0);
247
248     init_bootmem_done = 1;
249 }

246:空函数
248:标识bootmem初始化完成,此后可以使用使用bootmem来分配内存页了。bootmem使用位图来分配页,每次分配都从头开始扫描位,找到满足条件的连续空页。
3. 全部内存初始化完毕,可以用cache和buddy分配内存。

相关常量定义:
 26 #define NODE_DATA(nid)      (node_data[nid])

 782 extern struct pglist_data contig_page_data;
 783 #define NODE_DATA(nid)      (&contig_page_data)
 784 #define NODE_MEM_MAP(nid)   mem_map

4714 struct pglist_data __refdata contig_page_data = {
4716     .bdata = &bootmem_node_data[0]
4718 };

38 bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;//MAX_NUMNODES=0

 31 typedef struct bootmem_data {
 32     unsigned long node_min_pfn;
 33     unsigned long node_low_pfn;
 34     void *node_bootmem_map;
 35     unsigned long last_end_off;
 36     unsigned long hint_idx;
 37     struct list_head list;
 38 } bootmem_data_t;

 609 typedef struct pglist_data {
 610     struct zone node_zones[MAX_NR_ZONES];
 611     struct zonelist node_zonelists[MAX_ZONELISTS];
 612     int nr_zones;
 613 #ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
 614     struct page *node_mem_map;
 615 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 616     struct page_cgroup *node_page_cgroup;
 617 #endif
 618 #endif
 619 #ifndef CONFIG_NO_BOOTMEM
 620     struct bootmem_data *bdata;
 621 #endif
 622 #ifdef CONFIG_MEMORY_HOTPLUG
 623     /*
 624      * Must be held any time you expect node_start_pfn, node_present_pages
 625      * or node_spanned_pages stay constant.  Holding this will also
 626      * guarantee that any pfn_valid() stays that way.
 627      *
 628      * Nests above zone->lock and zone->size_seqlock.
 629      */
 630     spinlock_t node_size_lock;
 631 #endif
 632     unsigned long node_start_pfn;
 633     unsigned long node_present_pages; /* total number of physical pages */
 634     unsigned long node_spanned_pages; /* total size of physical page
 635                          range, including holes */
 636     int node_id;
 637     wait_queue_head_t kswapd_wait;
 638     struct task_struct *kswapd;
 639     int kswapd_max_order;
 640 } pg_data_t;

 576 struct zonelist {
 577     struct zonelist_cache *zlcache_ptr;          // NULL or &zlcache = NULL
 578     struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];//MAX_ZONES_PER_ZONELIST=4
 579 #ifdef CONFIG_NUMA
 580     struct zonelist_cache zlcache;               // optional ...
 581 #endif
 582 };

 554 struct zoneref {
 555     struct zone *zone;  /* Pointer to actual zone */
 556     int zone_idx;       /* zone_idx(zoneref->zone) */
 557 };

内存初始化步骤:
1. start_kernel---->setup_arch->pageing_init
                |-->setup_per_cpu_areas
                |-->build_all_zonelists
                |-->mem_init
                 -->setup_per_cpu_pageset


start_kernel---->setup_arch->pageing_init代码如下:

  1. 273 /*
  2. 274 * paging_init() sets up the page tables - in fact we've already done this.
  3. 275 */
  4. 276 void __init paging_init(void)
  5. 277 {
  6. 278     unsigned long total_ram = memblock_phys_mem_size();
  7. 279     phys_addr_t top_of_ram = memblock_end_of_DRAM();
  8. 280     unsigned long max_zone_pfns[MAX_NR_ZONES];
  9. 281
  10. 282 #ifdef CONFIG_PPC32
  11. 283     unsigned long v = __fix_to_virt(__end_of_fixed_addresses- 1);
  12. 284     unsigned long end = __fix_to_virt(FIX_HOLE);
  13. 285
  14. 286     for (; v< end; v+= PAGE_SIZE)
  15. 287     map_page(v, 0, 0);/* XXX gross */
  16. 288 #endif
  17. 289
  18. 290 #ifdef CONFIG_HIGHMEM
  19. 291     map_page(PKMAP_BASE, 0, 0);/* XXX gross */
  20. 292     pkmap_page_table = virt_to_kpte(PKMAP_BASE);
  21. 293
  22. 294     kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
  23. 295     kmap_prot = PAGE_KERNEL;
  24. 296 #endif/* CONFIG_HIGHMEM */
  25. 297
  26. 298     printk(KERN_DEBUG "Top of RAM: 0x%llx, Total RAM: 0x%lx\n",
  27. 299            (unsignedlonglong)top_of_ram, total_ram);
  28. 300     printk(KERN_DEBUG "Memory hole size: %ldMB\n",
  29. 301            (longint)((top_of_ram- total_ram)>> 20));
  30. 302     memset(max_zone_pfns, 0,sizeof(max_zone_pfns));
  31. 303 #ifdef CONFIG_HIGHMEM
  32. 304     max_zone_pfns[ZONE_DMA]= lowmem_end_addr>> PAGE_SHIFT;
  33. 305     max_zone_pfns[ZONE_HIGHMEM]= top_of_ram>> PAGE_SHIFT;
  34. 306 #else
  35. 307     max_zone_pfns[ZONE_DMA]= top_of_ram>> PAGE_SHIFT;
  36. 308 #endif
  37. 309     free_area_init_nodes(max_zone_pfns);
  38. 310
  39. 311     mark_nonram_nosave();
  40. 312 }
  41. 313 #endif/* ! CONFIG_NEED_MULTIPLE_NODES */

378-280:计算整个内存的大小,此处为:0x8000 0000
282-288: 映射临时映射空间,也称固定映射
290-196:映射高端内存PTE
307:最为重要,该函数清空bitmap区,把所有页设为保留。

                |-->setup_per_cpu_areas
该函数当非SMP情况使用,此处为空。
                |-->build_all_zonelists
函数代码为:


  1. 3031 void build_all_zonelists(void*data)
  2. 3032 {
  3. 3033     set_zonelist_order();
  4. 3034
  5. 3035     if (system_state== SYSTEM_BOOTING){
  6. 3036         __build_all_zonelists(NULL);
  7. 3037         mminit_verify_zonelist();
  8. 3038         cpuset_init_current_mems_allowed();
  9. 3039     } else{
  10. 3040     /* we have to stop all cpus to guarantee there is no user
  11. 3041     of zonelist */
  12. 3042     stop_machine(__build_all_zonelists, data,NULL);
  13. 3043     /* cpuset refresh routine should be here */
  14. 3044     }
  15. 3045     vm_total_pages = nr_free_pagecache_pages();
  16. 3046 /*
  17. 3047 * Disable grouping by mobility if the number of pages in the
  18. 3048 * system is too low to allow the mechanism to work. It would be
  19. 3049 * more accurate, but expensive to check per-zone. This check is
  20. 3050 * made on memory-hotadd so a system can start with mobility
  21. 3051 * disabled and enable it later
  22. 3052 */
  23. 3053      if (vm_total_pages<(pageblock_nr_pages* MIGRATE_TYPES))
  24. 3054          page_group_by_mobility_disabled = 1;
  25. 3055      else
  26. 3056          page_group_by_mobility_disabled = 0;
  27. 3057
  28. 3058      printk("Built %i zonelists in %s order, mobility grouping %s. "
  29. 3059               "Total pages: %ld\n",
  30. 3060      nr_online_nodes,
  31. 3061      zonelist_order_name[current_zonelist_order],
  32. 3062      page_group_by_mobility_disabled ?"off": "on",
  33. 3063      vm_total_pages);
  34. 3064 #ifdef CONFIG_NUMA
  35. 3065      printk("Policy zone: %s\n", zone_names[policy_zone]);
  36. 3066 #endif
  37. 3067 }

函数中:__build_all_zonelists函数建立了zone链表,当一个zone不能满足当前分配时会根据此链表来找到下一个可以分配的zone。当有多个节点时,建立后的结果为:
b2->b1->b0->a2->a1->a0 (b=node1,a=node0,2=highmem, 1=normal, 0=dma)

|-->mem_init函数代码为:

  1. 526 static void __init mm_init(void)
  2. 527 {
  3. 532     page_cgroup_init_flatmem();
  4. 533     mem_init();
  5. 534     kmem_cache_init();
  6. 535     pgtable_cache_init();
  7. 536     vmalloc_init();
  8. 537 }

532:为空函数
533:函数执行完毕后, mem_init_done = 1;即buddy系统分配内存内可以使用了。
534:用来初始化slab和cache,函数执行完后slab可以使用,可以分配少于1页的内存了。
535:空函数
536:vmalloc空间可以正常使用了。
整个内存的布局为:
Kernel virtual memory layout:
  * 0xfffcf000..0xfffff000  : fixmap
  * 0xff800000..0xffc00000  : highmem PTEs
  * 0xff7d8000..0xff800000  : early ioremap
  * 0xf1000000..0xff7d8000  : vmalloc & ioremap

你可能感兴趣的:(内存_内存管理的不同阶段)