Linux内存模型之bootmem分配器

 

简介:bootmem分配器是系统启动初期的内存分配方式,在耳熟能详的伙伴系统,slab系统建立前内存都是利用bootmem分配器来分配的,伙伴系统框架建立起来后,bootmem会过度到伙伴系统,bootmem大致思想就是收集内存中的可用内存,然后建立bit位图,然后需要的内存从这些空闲内存中分配,分配了就标记占用,当然这种分配方式很低效,但是由于只占用启动阶段很少一部分,所以也大可接受了,好了,废话不多说,一起看代码吧,
 

相关阅读:http://www.linuxidc.com/Linux/2012-02/53138.htm

一、初始化部分
init/main.c中
start_kernel函数
{
  ...
  setup_arch(&command_line);
  ...
}
代码位于arch/arm/kernel/setup.c中,
void __init setup_arch(char **cmdline_p)
{
 ...
 paging_init(mdesc);
 ...
}

 


*******************************************************
*  继续跟进paging_init函数在arch/arm/mm/mmu.c中

*******************************************************


/*
 * paging_init() sets up the page tables, initialises the zone memory
 * maps, and sets up the zero page, bad page and bad page tables.
 */
void __init paging_init(struct machine_desc *mdesc)
{
    void *zero_page;

    memblock_set_current_limit(lowmem_limit);

    build_mem_type_table();
    prepare_page_table();
    map_lowmem();
    devicemaps_init(mdesc);
    kmap_init();

    top_pmd = pmd_off_k(0xffff0000);

    /* allocate the zero page. */
    zero_page = early_alloc(PAGE_SIZE);

    bootmem_init();

    empty_zero_page = virt_to_page(zero_page);
    __flush_dcache_page(NULL, empty_zero_page);

}

 

***************************************************************
*  其中的bootmem_init()就是我们要找的,对应arm的定义在
*  arch/arm/mm/init.c
***************************************************************
void __init bootmem_init(void)
{
    unsigned long min, max_low, max_high;

    max_low = max_high = 0;

        /*
         *找到内存区域大小,max_low低端内存上界限
         *max_high 总内存上界
         */
    find_limits(&min, &max_low, &max_high);


        //arm bootmem初始化
    arm_bootmem_init(min, max_low);

    /*
     * Sparsemem tries to allocate bootmem in memory_present(),
     * so must be done after the fixed reservations
     */
    arm_memory_present();

    /*
     * sparse_init() needs the bootmem allocator up and running.
     */
    sparse_init();

    /*
     * Now free the memory - free_area_init_node needs
     * the sparse mem_map arrays initialized by sparse_init()
     * for memmap_init_zone(), otherwise all PFNs are invalid.
     */
    arm_bootmem_free(min, max_low, max_high);

    high_memory = __va(((phys_addr_t)max_low << PAGE_SHIFT) - 1) + 1;

    /*
     * This doesn't seem to be used by the Linux memory manager any
     * more, but is used by ll_rw_block.  If we can get rid of it, we
     * also get rid of some of the stuff above as well.
     *
     * Note: max_low_pfn and max_pfn reflect the number of _pages_ in
     * the system, not the maximum PFN.
     */
    max_low_pfn = max_low - PHYS_PFN_OFFSET;
    max_pfn = max_high - PHYS_PFN_OFFSET;
}

*******************************************************
*  这里的函数需要一个一个的过一下,先看这个
*  find_limits(&min, &max_low, &max_high)函数
*  同文件夹下
*******************************************************
static void __init find_limits(unsigned long *min, unsigned long *max_low,
    unsigned long *max_high)
{
    struct meminfo *mi = &meminfo;  //内存bank的表述结构
    int i;

    *min = -1UL;
    *max_low = *max_high = 0;

    for_each_bank (i, mi) {
        struct membank *bank = &mi->bank[i];
        unsigned long start, end;

        start = bank_pfn_start(bank);
        end = bank_pfn_end(bank);

        if (*min > start)
            *min = start;             //保证min最后是最小值
        if (*max_high < end)    //保证最后的max_high是最大值
            *max_high = end;
        if (bank->highmem)      //如果bank是高端内存那么不再标记max_low
            continue;
        if (*max_low < end)     //如果是普通内存,那么max_low继续跟着增大
            *max_low = end;
    }
}

这样会遍历所有的bank,start和end分别表示每个bank的开始和结束地址,max_low标记成低端最大值,例如:896M,平时耳熟能详的x86上面,这个值不是个固定值,是根据总内存大小来的,896M比较常见.

**************************************************************
*  回过头来继续看arm_bootmem_init(min, max_low)

*  将刚才得到的值传入,可见bootmem是操作的低端内存
**************************************************************
static void __init arm_bootmem_init(unsigned long start_pfn,
    unsigned long end_pfn)
{
    struct memblock_region *reg;
    unsigned int boot_pages;
    phys_addr_t bitmap;
    pg_data_t *pgdat;

    /*
     * Allocate the bootmem bitmap page.  This must be in a region
     * of memory which has already been mapped.
         *
         * 分配bitmap所需的内存,bitmap是一个内存单元分配与否的标志,
         * 用一个bit来表示,当然它自身也要占用一定内存,参数是低端起始
         * 内存到低端上界内存,这里假设拿x86的896M来说,那么就是
         * 0---896M,哦,忘了说一点,这里应当是以4k(一页)为单位,
         * 所以应当是0---896M/4k
     */

    boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn);

        //分配bitmap
    bitmap = memblock_alloc_base(boot_pages << PAGE_SHIFT, L1_CACHE_BYTES,
                __pfn_to_phys(end_pfn));

    /*
     * Initialise the bootmem allocator, handing the
     * memory banks over to bootmem.
         * 初始化分配器核心,包括bitmap内容清空
     */
    node_set_online(0);
    pgdat = NODE_DATA(0);
    init_bootmem_node(pgdat, __phys_to_pfn(bitmap), start_pfn, end_pfn);

       /* Free the lowmem regions from memblock into bootmem.
        * 把memblock收集的那些内存标志到bootmem中,也就是把这些
        * 内存在bootmem中标记成可用在bitmap中
        *
        */
    for_each_memblock(memory, reg) {
        unsigned long start = memblock_region_memory_base_pfn(reg);
        unsigned long end = memblock_region_memory_end_pfn(reg);

        if (end >= end_pfn)
            end = end_pfn;
        if (start >= end)
            break;

        free_bootmem(__pfn_to_phys(start), (end - start) << PAGE_SHIFT);
    }

    /* Reserve the lowmem memblock reserved regions in bootmem.
        *  再过滤一遍,把memblock已用的内存在bitmap中标记成已使用
        *
        */
    for_each_memblock(reserved, reg) {
        unsigned long start = memblock_region_reserved_base_pfn(reg);
        unsigned long end = memblock_region_reserved_end_pfn(reg);

        if (end >= end_pfn)
            end = end_pfn;
        if (start >= end)
            break;

        reserve_bootmem(__pfn_to_phys(start),
                    (end - start) << PAGE_SHIFT, BOOTMEM_DEFAULT);
    }
}

*******************************************************
*  bootmem_bootmap_pages是个体系结构无关函数在
*  mm/bootmem.c中定义
*******************************************************
/**
 * bootmem_bootmap_pages - calculate bitmap size in pages
 * @pages: number of pages the bitmap has to represent
 */
unsigned long __init bootmem_bootmap_pages(unsigned long pages)
{
    unsigned long bytes = bootmap_bytes(pages);

    return PAGE_ALIGN(bytes) >> PAGE_SHIFT;
}


static unsigned long __init bootmap_bytes(unsigned long pages)
{
        //页数/8得到需要用多少个字节表示这些页
    unsigned long bytes = (pages + 7) / 8;

    return ALIGN(bytes, sizeof(long));
}

 

返回的即是bitmap的size,下面函数将分配bitmap,
*******************************************************
*  memblock_alloc_base这个东东以前没见过,顺着看看
*  在/mm/memblock.c中,这里的参数max_addr为896M
*******************************************************

phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
{
    phys_addr_t found;

    /* We align the size to limit fragmentation. Without this, a lot of
     * small allocs quickly eat up the whole reserve array on sparc
     */
    size = memblock_align_up(size, align);

    found = memblock_find_base(size, align, 0, max_addr);
    if (found != MEMBLOCK_ERROR &&
        !memblock_add_region(&memblock.reserved, found, size))
        return found;

    return 0;
}

*******************************************************
*  memblock_find_base在可操作内存范围内,找到第一个bitmap
*  size大小的空闲内存
*******************************************************
static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size,
            phys_addr_t align, phys_addr_t start, phys_addr_t end)
{
    long i;

    BUG_ON(0 == size);

    /* Pump up max_addr */
    if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
        end = memblock.current_limit;

    /* We do a top-down search, this tends to limit memory
     * fragmentation by keeping early boot allocs near the
     * top of memory
     */
    for (i = memblock.memory.cnt - 1; i >= 0; i--) {
        phys_addr_t memblockbase = memblock.memory.regions[i].base;
        phys_addr_t memblocksize = memblock.memory.regions[i].size;
        phys_addr_t bottom, top, found;

        if (memblocksize < size)
            continue;
        if ((memblockbase + memblocksize) <= start)
            break;
        bottom = max(memblockbase, start);
        top = min(memblockbase + memblocksize, end);
        if (bottom >= top)
            continue;
        found = memblock_find_region(bottom, top, size, align);
        if (found != MEMBLOCK_ERROR)
            return found;
    }
    return MEMBLOCK_ERROR;
}

memblock应当是收集的内存信息的一个描述,回头再看一下,这里会找到这么个基地之然后将其
返回,以前这个地址应当是在紧挨着内核镜像bss后面,继续回到arm_bootmem_init

函数,下一个是

 


/**
 * init_bootmem_node - register a node as boot memory
 * @pgdat: node to register
 * @freepfn: pfn where the bitmap for this node is to be placed
 * @startpfn: first pfn on the node
 * @endpfn: first pfn after the node
 *
 * Returns the number of bytes needed to hold the bitmap for this node.
 */
unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
                unsigned long startpfn, unsigned long endpfn)
{
    return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
}

/*
 * Called once to set up the allocator itself.
 * bootmem 核心初始化
 */
static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
    unsigned long mapstart, unsigned long start, unsigned long end)
{
    unsigned long mapsize;

    mminit_validate_memmodel_limits(&start, &end);
    bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));  //bitmap起始地址
    bdata->node_min_pfn = start;     //节点起始地址
    bdata->node_low_pfn = end;       //结束地址
    link_bootmem(bdata);

    /*
     * Initially all pages are reserved - setup_arch() has to
     * register free RAM areas explicitly.
     */
    mapsize = bootmap_bytes(end - start);
        //又算了一遍????
    memset(bdata->node_bootmem_map, 0xff, mapsize);
        //node_bootmem_map记录bitmap起始地址,清空这里

    bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",
        bdata - bootmem_node_data, start, mapstart, end, mapsize);

    return mapsize;
}

靠,现在的分配器真成分配器了,不带收集的??

总结:总括了bootmem分配器,介绍了其原理,后文准备跟一下memblock这个东东,以及那些分配函数具体实现^.^!.

相关阅读:http://www.linuxidc.com/Linux/2012-02/53140.htm

简介:介绍一下上文遗留的两个结构memblock以及meminfo.

相关阅读:http://www.linuxidc.com/Linux/2012-02/53139.htm

**********************************

*  在arch/arm/kernel/setup.c中,
 

*  sanity_check_meminfo();

**********************************
arm_memblock_init(&meminfo, mdesc);

arch/arm/mm中,
void __init sanity_check_meminfo(void)
{
 int i, j, highmem = 0;

        //遍历所有的bank
 for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
  struct membank *bank = &meminfo.bank[j];
  *bank = meminfo.bank[i];

#ifdef CONFIG_HIGHMEM
       //如果定义了高端内存,地址范围在高端内,则标记
  if (__va(bank->start) >= vmalloc_min ||
      __va(bank->start) < (void *)PAGE_OFFSET)
   highmem = 1;

  bank->highmem = highmem;       //标记

  /*
   * Split those memory banks which are partially overlapping
   * the vmalloc area greatly simplifying things later.
   */
                //范围在低端内存,但是size大于低端范围也就是
                //起始地址在低端,结束地址超过低端范围
  if (__va(bank->start) < vmalloc_min &&
      bank->size > vmalloc_min - __va(bank->start)) {
                        //banks号大于等于总数量,则直接忽略该bank
                        //否则将该bank分成两个bank,高端部分标记成高端
   if (meminfo.nr_banks >= NR_BANKS) {
    printk(KERN_CRIT "NR_BANKS too low, "
       "ignoring high memory\n");
   } else {
    memmove(bank + 1, bank,
     (meminfo.nr_banks - i) * sizeof(*bank));
    meminfo.nr_banks++;
    i++;
    bank[1].size -= vmalloc_min - __va(bank->start);
    bank[1].start = __pa(vmalloc_min - 1) + 1;
    bank[1].highmem = highmem = 1;
    j++;
   }
   bank->size = vmalloc_min - __va(bank->start);
  }
#else
  bank->highmem = highmem;   //没定义就赋值为0

  /*
   * Check whether this memory bank would entirely overlap
   * the vmalloc area.
   */
                //没定义高端地址,那么高端部分全部忽略
  if (__va(bank->start) >= vmalloc_min ||
      __va(bank->start) < (void *)PAGE_OFFSET) {
   printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx "
          "(vmalloc region overlap).\n",
          (unsigned long long)bank->start,
          (unsigned long long)bank->start + bank->size - 1);
   continue;
  }

  /*
   * Check whether this memory bank would partially overlap
   * the vmalloc area.
   */
  if (__va(bank->start + bank->size) > vmalloc_min ||
      __va(bank->start + bank->size) < __va(bank->start)) {
   unsigned long newsize = vmalloc_min - __va(bank->start);
   printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx "
          "to -%.8llx (vmalloc region overlap).\n",
          (unsigned long long)bank->start,
          (unsigned long long)bank->start + bank->size - 1,
          (unsigned long long)bank->start + newsize - 1);
   bank->size = newsize;
  }
#endif
  if (!bank->highmem && bank->start + bank->size > lowmem_limit)
   lowmem_limit = bank->start + bank->size;

  j++;
 }
#ifdef CONFIG_HIGHMEM
        //如果是高端内存,还要进行进一步支持的确认,vipt的不支持
 if (highmem) {
  const char *reason = NULL;

  if (cache_is_vipt_aliasing()) {
   /*
    * Interactions between kmap and other mappings
    * make highmem support with aliasing VIPT caches
    * rather difficult.
    */
   reason = "with VIPT aliasing cache";
  }
  if (reason) {
   printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n",
    reason);
   while (j > 0 && meminfo.bank[j - 1].highmem)
    j--;
  }
 }
#endif
 meminfo.nr_banks = j;
 memblock_set_current_limit(lowmem_limit);
}


这里其中vmalloc_vin 定义为static void * __initdata vmalloc_min = (void *)(VMALLOC_END - SZ_128M);
而vmalloc_end在2440上定义为arch/arm/mach-s3c2410/include/mach/vmalloc.h
#define VMALLOC_END 0xF6000000UL

 

上面又提到了meminfo看来不解决他也不行了,
先看定义,这个东西大概是这个位置,
arch/arm/kernel/setup.c-->setup_arch-->setup_machine_tags
这个函数中
...
 

if (mdesc->fixup)            //一种方法,可惜2440没有
  mdesc->fixup(tags, &from, &meminfo);

 if (tags->hdr.tag == ATAG_CORE) {
  if (meminfo.nr_banks != 0)
   squash_mem_tags(tags);
  save_atags(tags);
  parse_tags(tags);        //另一个地方
 }
...

跟进去最终会到这里
/*
 * Scan the tag table for this tag, and call its parse function.
 * The tag table is built by the linker from all the __tagtable
 * declarations.
 */
static int __init parse_tag(const struct tag *tag)
{
 extern struct tagtable __tagtable_begin, __tagtable_end;
 struct tagtable *t;

 for (t = &__tagtable_begin; t < &__tagtable_end; t++)
  if (tag->hdr.tag == t->tag) {
   t->parse(tag);
   break;
  }

 return t < &__tagtable_end;
}

__tagtable_begin同样单独的一个段,定义在arch/arm/vmlinux.lds,
__arch_info_begin = .;
            *(.arch.info)
        __arch_info_end = .;
        __tagtable_begin = .;
            *(.taglist)
        __tagtable_end = .;
            *(.data.init)
        . = ALIGN(16);
其中同文件下
tatic int __init parse_tag_mem32(const struct tag *tag)
{
 return arm_add_memory(tag->u.mem.start, tag->u.mem.size);
}

__tagtable(ATAG_MEM, parse_tag_mem32);

又有
#define __tag __used __attribute__((__section__(".taglist.init")))
#define __tagtable(tag, fn) \
static const struct tagtable __tagtable_##fn __tag = { tag, fn }

可见parse_tag_mem32就被定义于这个段,那么就会调用它,跟进去又会调用arm_add_memory(tag->u.mem.start, tag->u.mem.size);这个是我们的重点函数,会把参数列表传递过来的内存信息对齐后打包到meminfo,找到了meminfo怎么来的,回来再继续看刚才剩下的一个函数,arm_memblock_init(&meminfo, mdesc);这个函数比较简单,这里就不贴出来了,主要是把刚才得到的内存布局中已经使用的部分标记出来比如kernel占用部分,异常表占用部分等等...

总结:分析了上文遗留的两个结构memblock以及meminfo,bootmem结构到现在应当很清晰了,下文会简单分析下具体的bootmem的分配内存
函数 ^.^!


本篇文章来源于 Linux公社网站(www.linuxidc.com)  原文链接:http://www.linuxidc.com/Linux/2012-02/53140p2.htm

 

你可能感兴趣的:(linux,struct,table,Arrays,tags,linker)