3.4.2 特定于体系结构的设置 (二):内存初始化步骤

初始化步骤

在内核已经载入内存、而初始化的汇编程序部分已经执行完毕后,内核必须执行哪些特定于系统的步骤呢?3-12给出了各个操作的代码流程图。

 

start_kernel

->setup_arch

  ->machine_specific_memory_setup

  ->setup_memory

  ->paging_init

    -> free_area_init

      ->free_area_init_node


1)首先调用machine_specific_memory_setup3.18.3版本直接在setup_arch中完成,而取消了machine_specific_memory_setup,该函数实现可以参考2.6.24)创建链表,包括系统占据内存区和空闲内存区。由于IA-32家族的各个子体系结构获得该信息 方式稍有不同,内核提供了一个特定于机器的函数,定义在include/asm-x86/mach-type/setup.c中,type可以是defaultvoyagervisws。这里只讨论default的情况。

BIOS提供的映射给出了在这种情况下使用的各个内存区,在系统启动时,找到的内存区由内核函数print_memory_map显示。

static void __init print_memory_map(void)
{
        int i;
        const int field = 2 * sizeof(unsigned long);
 
        for (i = 0; i < boot_mem_map.nr_map; i++) {
                printk(KERN_INFO " memory: %0*Lx @ %0*Lx ",
                       field, (unsigned long long) boot_mem_map.map[i].size,
                       field, (unsigned long long) boot_mem_map.map[i].addr);
 
                switch (boot_mem_map.map[i].type) {
                case BOOT_MEM_RAM:
                        printk(KERN_CONT "(usable)\n");
                        break;
                case BOOT_MEM_INIT_RAM:
                        printk(KERN_CONT "(usable after init)\n");
                        break;
                case BOOT_MEM_ROM_DATA:
                        printk(KERN_CONT "(ROM data)\n");
                        break;
                case BOOT_MEM_RESERVED:
                        printk(KERN_CONT "(reserved)\n");
                        break;
                default:
                        printk(KERN_CONT "type %lu\n", boot_mem_map.map[i].type);
                        break;
                }
        }
}


打印出来的信息类似于(取自x86 baytrail系统):

[    0.000000] e820: BIOS-provided physical RAM map:

[    0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009c7ff] usable

[    0.000000] BIOS-e820: [mem 0x000000000009c800-0x000000000009ffff] reserved

[    0.000000] BIOS-e820: [mem 0x00000000000e0000-0x00000000000fffff] reserved

[    0.000000] BIOS-e820: [mem 0x0000000000100000-0x000000001fffffff] usable

[    0.000000] BIOS-e820: [mem 0x0000000020000000-0x00000000200fffff] reserved

[    0.000000] BIOS-e820: [mem 0x0000000020100000-0x000000006d590fff] usable

[    0.000000] BIOS-e820: [mem 0x000000006d591000-0x000000006d5c0fff] reserved

[    0.000000] BIOS-e820: [mem 0x000000006d5c1000-0x000000006d5d0fff] ACPI data

[    0.000000] BIOS-e820: [mem 0x000000006d5d1000-0x000000006d715fff] ACPI NVS

[    0.000000] BIOS-e820: [mem 0x000000006d716000-0x000000006da79fff] reserved

[    0.000000] BIOS-e820: [mem 0x000000006da7a000-0x000000006da7afff] usable

[    0.000000] BIOS-e820: [mem 0x000000006da7b000-0x000000006dabcfff] reserved

[    0.000000] BIOS-e820: [mem 0x000000006dabd000-0x000000006dc2afff] usable

[    0.000000] BIOS-e820: [mem 0x000000006dc2b000-0x000000006dff9fff] reserved

[    0.000000] BIOS-e820: [mem 0x000000006dffa000-0x000000006dffffff] usable

[    0.000000] BIOS-e820: [mem 0x00000000e00f8000-0x00000000e00f8fff] reserved

[    0.000000] BIOS-e820: [mem 0x00000000fed01000-0x00000000fed01fff] reserved

[    0.000000] BIOS-e820: [mem 0x00000000ffb00000-0x00000000ffffffff] reserved

如果BIOS没有提供信息,内核自身生成一个表,将0-640KB1MB之前的内存标记为可用。


(2)内核接下来调用parse_cmd_early(这个在3.18.3中仍被取代,直接在setup_arch中实现,可参考2.6.24)分析命令行,主要关注类似mem=xxxhighmem=xxxmemmap=xxx之类的参数。如果内核计算的值或BIOS提供的值不正确,管理员可以修改可用内存的数量或手动划定内存区。该选项只适用于比较古老的计算机。Highmem-允许修改检测到的高端内存域的长度。它可用于内存配置非常大的计算机,以限定可用的内存的数量,因为超大内存有时会导致性能下降。

(3)下一个主要步骤在setup_memory中执行。其主要功能

A. 确定可用的物理内存的数目

B. 初始化bootmem分配器

C. 接下来分配各种内存区,如运行第一个用户空间过程所需的最初的RAM磁盘。

setup_memory(void *kernel_end)
{
        struct memclust_struct * cluster;
        struct memdesc_struct * memdesc;
        unsigned long start_kernel_pfn, end_kernel_pfn;
        unsigned long bootmap_size, bootmap_pages, bootmap_start;
        unsigned long start, end;
        unsigned long i;
 
        /* Find free clusters, and init and free the bootmem accordingly.  */
        memdesc = (struct memdesc_struct *)
          (hwrpb->mddt_offset + (unsigned long) hwrpb);
 
        for_each_mem_cluster(memdesc, cluster, i) {
                printk("memcluster %lu, usage %01lx, start %8lu, end %8lu\n",
                       i, cluster->usage, cluster->start_pfn,
                       cluster->start_pfn + cluster->numpages);
 
                /* Bit 0 is console/PALcode reserved.  Bit 1 is
                   non-volatile memory -- we might want to mark
                   this for later.  */
                if (cluster->usage & 3)
                        continue;
 
                end = cluster->start_pfn + cluster->numpages;
                if (end > max_low_pfn)
                        max_low_pfn = end;
        }
 
        /*
         * Except for the NUMA systems (wildfire, marvel) all of the 
         * Alpha systems we run on support 32GB of memory or less.
         * Since the NUMA systems introduce large holes in memory addressing,
         * we can get into a situation where there is not enough contiguous
         * memory for the memory map. 
         *
         * Limit memory to the first 32GB to limit the NUMA systems to 
         * memory on their first node (wildfire) or 2 (marvel) to avoid 
         * not being able to produce the memory map. In order to access 
         * all of the memory on the NUMA systems, build with discontiguous
         * memory support.
         *
         * If the user specified a memory limit, let that memory limit stand.
         */
        if (!mem_size_limit)
                mem_size_limit = (32ul * 1024 * 1024 * 1024) >> PAGE_SHIFT;
        if (mem_size_limit && max_low_pfn >= mem_size_limit)
        {
                printk("setup: forcing memory size to %ldK (from %ldK).\n",
                       mem_size_limit << (PAGE_SHIFT - 10),
                       max_low_pfn    << (PAGE_SHIFT - 10));
                max_low_pfn = mem_size_limit;
        }
 
        /* Find the bounds of kernel memory.  */
        start_kernel_pfn = PFN_DOWN(KERNEL_START_PHYS);
        end_kernel_pfn = PFN_UP(virt_to_phys(kernel_end));
        bootmap_start = -1;
 
 try_again:
        if (max_low_pfn <= end_kernel_pfn)
                panic("not enough memory to boot");
 
        /* We need to know how many physically contiguous pages
           we'll need for the bootmap.  */
        bootmap_pages = bootmem_bootmap_pages(max_low_pfn);
 
        /* Now find a good region where to allocate the bootmap.  */
        for_each_mem_cluster(memdesc, cluster, i) {
                if (cluster->usage & 3)
                        continue;
 
                start = cluster->start_pfn;
                end = start + cluster->numpages;
                if (start >= max_low_pfn)
                        continue;
                if (end > max_low_pfn)
                        end = max_low_pfn;
                if (start < start_kernel_pfn) {
                        if (end > end_kernel_pfn
                            && end - end_kernel_pfn >= bootmap_pages) {
                                bootmap_start = end_kernel_pfn;
                                break;
                        } else if (end > start_kernel_pfn)
                                end = start_kernel_pfn;
                } else if (start < end_kernel_pfn)
                        start = end_kernel_pfn;
                if (end - start >= bootmap_pages) {
                        bootmap_start = start;
                                                                                   
                        break;
                }
        }
 
        if (bootmap_start == ~0UL) {
                max_low_pfn >>= 1;
                goto try_again;
        }
 
        /* Allocate the bootmap and mark the whole MM as reserved.  */
        bootmap_size = init_bootmem(bootmap_start, max_low_pfn);
 
        /* Mark the free regions.  */
        for_each_mem_cluster(memdesc, cluster, i) {
                if (cluster->usage & 3)
                        continue;
 
                start = cluster->start_pfn;
                end = cluster->start_pfn + cluster->numpages;
                if (start >= max_low_pfn)
                        continue;
                if (end > max_low_pfn)
                        end = max_low_pfn;
                if (start < start_kernel_pfn) {
                        if (end > end_kernel_pfn) {
                                free_bootmem(PFN_PHYS(start),
                                             (PFN_PHYS(start_kernel_pfn)
                                              - PFN_PHYS(start)));
                                printk("freeing pages %ld:%ld\n",
                                       start, start_kernel_pfn);
                                start = end_kernel_pfn;
                        } else if (end > start_kernel_pfn)
                                end = start_kernel_pfn;
                } else if (start < end_kernel_pfn)
                        start = end_kernel_pfn;
                if (start >= end)
                        continue;
 
                free_bootmem(PFN_PHYS(start), PFN_PHYS(end) - PFN_PHYS(start));
                printk("freeing pages %ld:%ld\n", start, end);
        }
 
        /* Reserve the bootmap memory.  */
        reserve_bootmem(PFN_PHYS(bootmap_start), bootmap_size,
                        BOOTMEM_DEFAULT);
        printk("reserving pages %ld:%ld\n", bootmap_start, bootmap_start+PFN_UP(bootmap_size));
 
#ifdef CONFIG_BLK_DEV_INITRD
        initrd_start = INITRD_START;
        if (initrd_start) {
                initrd_end = initrd_start+INITRD_SIZE;
                printk("Initial ramdisk at: 0x%p (%lu bytes)\n",
                       (void *) initrd_start, INITRD_SIZE);
 
                if ((void *)initrd_end > phys_to_virt(PFN_PHYS(max_low_pfn))) {
                        if (!move_initrd(PFN_PHYS(max_low_pfn)))
                                printk("initrd extends beyond end of memory "
                                       "(0x%08lx > 0x%p)\ndisabling initrd\n",
                                       initrd_end,
                                       phys_to_virt(PFN_PHYS(max_low_pfn)));
                } else {
                        reserve_bootmem(virt_to_phys((void *)initrd_start),
                                        INITRD_SIZE, BOOTMEM_DEFAULT);
                }
        }
#endif /* CONFIG_BLK_DEV_INITRD */
}

                                                                                                                     

(4)paging_init初始化内核页表并启动内存分页,因为IA-32计算机上默认情况下分页是禁用的,如果内核编译了PAE,而且处理器也支持Execute Disable Protection,则启用该特性。通过调用pagetable_init,该函数确保了直接映射到内核地址空间的物理内存被初始化。低端内存中的所有页帧都直接映射到PAGE_OFFSET之上的虚拟内存区。这使得内核无需处理页表,即可寻址相当一部分可用内存。

void __init paging_init(void)
{
        unsigned long zones_size[MAX_NR_ZONES] = {0, };
        unsigned long dma_pfn, high_pfn;
 
        dma_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
        high_pfn = max_pfn = max_low_pfn;
        
        if (dma_pfn >= high_pfn)
                zones_size[ZONE_DMA] = high_pfn; 
        else {
                zones_size[ZONE_DMA] = dma_pfn;
                zones_size[ZONE_NORMAL] = high_pfn - dma_pfn;
        }
        
        /* Initialize mem_map[].  */
        free_area_init(zones_size);
 
        /* Initialize the kernel's ZERO_PGE. */
        memset((void *)ZERO_PGE, 0, PAGE_SIZE);
}


(5)Zone_sizes_init3.18.3中不存在,已合并到paging_init中),初始化系统中所有结点pgdat_t的实例。首先使用add_active_range,对可用的物理内存建立一个相对简单的列表。体系结构无关的函数free_area_init_nodes接下来使用该信息建立完备的内核数据结构。这是一个非常重要的步骤,对内核在运行时管理页帧的方式.

void __init free_area_init(unsigned long *zones_size)
{       
        free_area_init_node(0, zones_size,
                        __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
}  
void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
                unsigned long node_start_pfn, unsigned long *zholes_size)
{
        pg_data_t *pgdat = NODE_DATA(nid);
        unsigned long start_pfn = 0;
        unsigned long end_pfn = 0;
 
        /* pg_data_t should be reset to zero when it's allocated */
        WARN_ON(pgdat->nr_zones || pgdat->classzone_idx);
 
        pgdat->node_id = nid;
        pgdat->node_start_pfn = node_start_pfn;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
        get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
        printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid,
                        (u64) start_pfn << PAGE_SHIFT, (u64) (end_pfn << PAGE_SHIFT) - 1);
#endif
        calculate_node_totalpages(pgdat, start_pfn, end_pfn,
                                  zones_size, zholes_size);
 
        alloc_node_mem_map(pgdat);
#ifdef CONFIG_FLAT_NODE_MEM_MAP
        printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n",
                nid, (unsigned long)pgdat,
                (unsigned long)pgdat->node_mem_map);
#endif
 
        free_area_init_core(pgdat, start_pfn, end_pfn,
                            zones_size, zholes_size);
}


你可能感兴趣的:(3.4.2 特定于体系结构的设置 (二):内存初始化步骤)