简介:bootmem分配器是系统启动初期的内存分配方式,在耳熟能详的伙伴系统,slab系统建立前内存都是利用bootmem分配器来分配的,伙伴系统框架建立起来后,bootmem会过度到伙伴系统,bootmem大致思想就是收集内存中的可用内存,然后建立bit位图,然后需要的内存从这些空闲内存中分配,分配了就标记占用,当然这种分配方式很低效,但是由于只占用启动阶段很少一部分,所以也大可接受了,好了,废话不多说,一起看代码吧,
相关阅读:http://www.linuxidc.com/Linux/2012-02/53138.htm
一、初始化部分
init/main.c中
start_kernel函数
{
...
setup_arch(&command_line);
...
}
代码位于arch/arm/kernel/setup.c中,
void __init setup_arch(char **cmdline_p)
{
...
paging_init(mdesc);
...
}
*******************************************************
* 继续跟进paging_init函数在arch/arm/mm/mmu.c中
*
*******************************************************
/*
* paging_init() sets up the page tables, initialises the zone memory
* maps, and sets up the zero page, bad page and bad page tables.
*/
void __init paging_init(struct machine_desc *mdesc)
{
void *zero_page;
memblock_set_current_limit(lowmem_limit);
build_mem_type_table();
prepare_page_table();
map_lowmem();
devicemaps_init(mdesc);
kmap_init();
top_pmd = pmd_off_k(0xffff0000);
/* allocate the zero page. */
zero_page = early_alloc(PAGE_SIZE);
bootmem_init();
empty_zero_page = virt_to_page(zero_page);
__flush_dcache_page(NULL, empty_zero_page);
}
***************************************************************
* 其中的bootmem_init()就是我们要找的,对应arm的定义在
* arch/arm/mm/init.c
***************************************************************
void __init bootmem_init(void)
{
unsigned long min, max_low, max_high;
max_low = max_high = 0;
/*
*找到内存区域大小,max_low低端内存上界限
*max_high 总内存上界
*/
find_limits(&min, &max_low, &max_high);
//arm bootmem初始化
arm_bootmem_init(min, max_low);
/*
* Sparsemem tries to allocate bootmem in memory_present(),
* so must be done after the fixed reservations
*/
arm_memory_present();
/*
* sparse_init() needs the bootmem allocator up and running.
*/
sparse_init();
/*
* Now free the memory - free_area_init_node needs
* the sparse mem_map arrays initialized by sparse_init()
* for memmap_init_zone(), otherwise all PFNs are invalid.
*/
arm_bootmem_free(min, max_low, max_high);
high_memory = __va(((phys_addr_t)max_low << PAGE_SHIFT) - 1) + 1;
/*
* This doesn't seem to be used by the Linux memory manager any
* more, but is used by ll_rw_block. If we can get rid of it, we
* also get rid of some of the stuff above as well.
*
* Note: max_low_pfn and max_pfn reflect the number of _pages_ in
* the system, not the maximum PFN.
*/
max_low_pfn = max_low - PHYS_PFN_OFFSET;
max_pfn = max_high - PHYS_PFN_OFFSET;
}
*******************************************************
* 这里的函数需要一个一个的过一下,先看这个
* find_limits(&min, &max_low, &max_high)函数
* 同文件夹下
*******************************************************
static void __init find_limits(unsigned long *min, unsigned long *max_low,
unsigned long *max_high)
{
struct meminfo *mi = &meminfo; //内存bank的表述结构
int i;
*min = -1UL;
*max_low = *max_high = 0;
for_each_bank (i, mi) {
struct membank *bank = &mi->bank[i];
unsigned long start, end;
start = bank_pfn_start(bank);
end = bank_pfn_end(bank);
if (*min > start)
*min = start; //保证min最后是最小值
if (*max_high < end) //保证最后的max_high是最大值
*max_high = end;
if (bank->highmem) //如果bank是高端内存那么不再标记max_low
continue;
if (*max_low < end) //如果是普通内存,那么max_low继续跟着增大
*max_low = end;
}
}
这样会遍历所有的bank,start和end分别表示每个bank的开始和结束地址,max_low标记成低端最大值,例如:896M,平时耳熟能详的x86上面,这个值不是个固定值,是根据总内存大小来的,896M比较常见.
**************************************************************
* 回过头来继续看arm_bootmem_init(min, max_low)
* 将刚才得到的值传入,可见bootmem是操作的低端内存
**************************************************************
static void __init arm_bootmem_init(unsigned long start_pfn,
unsigned long end_pfn)
{
struct memblock_region *reg;
unsigned int boot_pages;
phys_addr_t bitmap;
pg_data_t *pgdat;
/*
* Allocate the bootmem bitmap page. This must be in a region
* of memory which has already been mapped.
*
* 分配bitmap所需的内存,bitmap是一个内存单元分配与否的标志,
* 用一个bit来表示,当然它自身也要占用一定内存,参数是低端起始
* 内存到低端上界内存,这里假设拿x86的896M来说,那么就是
* 0---896M,哦,忘了说一点,这里应当是以4k(一页)为单位,
* 所以应当是0---896M/4k
*/
boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
//分配bitmap
bitmap = memblock_alloc_base(boot_pages << PAGE_SHIFT, L1_CACHE_BYTES,
__pfn_to_phys(end_pfn));
/*
* Initialise the bootmem allocator, handing the
* memory banks over to bootmem.
* 初始化分配器核心,包括bitmap内容清空
*/
node_set_online(0);
pgdat = NODE_DATA(0);
init_bootmem_node(pgdat, __phys_to_pfn(bitmap), start_pfn, end_pfn);
/* Free the lowmem regions from memblock into bootmem.
* 把memblock收集的那些内存标志到bootmem中,也就是把这些
* 内存在bootmem中标记成可用在bitmap中
*
*/
for_each_memblock(memory, reg) {
unsigned long start = memblock_region_memory_base_pfn(reg);
unsigned long end = memblock_region_memory_end_pfn(reg);
if (end >= end_pfn)
end = end_pfn;
if (start >= end)
break;
free_bootmem(__pfn_to_phys(start), (end - start) << PAGE_SHIFT);
}
/* Reserve the lowmem memblock reserved regions in bootmem.
* 再过滤一遍,把memblock已用的内存在bitmap中标记成已使用
*
*/
for_each_memblock(reserved, reg) {
unsigned long start = memblock_region_reserved_base_pfn(reg);
unsigned long end = memblock_region_reserved_end_pfn(reg);
if (end >= end_pfn)
end = end_pfn;
if (start >= end)
break;
reserve_bootmem(__pfn_to_phys(start),
(end - start) << PAGE_SHIFT, BOOTMEM_DEFAULT);
}
}
*******************************************************
* bootmem_bootmap_pages是个体系结构无关函数在
* mm/bootmem.c中定义
*******************************************************
/**
* bootmem_bootmap_pages - calculate bitmap size in pages
* @pages: number of pages the bitmap has to represent
*/
unsigned long __init bootmem_bootmap_pages(unsigned long pages)
{
unsigned long bytes = bootmap_bytes(pages);
return PAGE_ALIGN(bytes) >> PAGE_SHIFT;
}
static unsigned long __init bootmap_bytes(unsigned long pages)
{
//页数/8得到需要用多少个字节表示这些页
unsigned long bytes = (pages + 7) / 8;
return ALIGN(bytes, sizeof(long));
}
返回的即是bitmap的size,下面函数将分配bitmap,
*******************************************************
* memblock_alloc_base这个东东以前没见过,顺着看看
* 在/mm/memblock.c中,这里的参数max_addr为896M
*******************************************************
phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
{
phys_addr_t found;
/* We align the size to limit fragmentation. Without this, a lot of
* small allocs quickly eat up the whole reserve array on sparc
*/
size = memblock_align_up(size, align);
found = memblock_find_base(size, align, 0, max_addr);
if (found != MEMBLOCK_ERROR &&
!memblock_add_region(&memblock.reserved, found, size))
return found;
return 0;
}
*******************************************************
* memblock_find_base在可操作内存范围内,找到第一个bitmap
* size大小的空闲内存
*******************************************************
static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size,
phys_addr_t align, phys_addr_t start, phys_addr_t end)
{
long i;
BUG_ON(0 == size);
/* Pump up max_addr */
if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
end = memblock.current_limit;
/* We do a top-down search, this tends to limit memory
* fragmentation by keeping early boot allocs near the
* top of memory
*/
for (i = memblock.memory.cnt - 1; i >= 0; i--) {
phys_addr_t memblockbase = memblock.memory.regions[i].base;
phys_addr_t memblocksize = memblock.memory.regions[i].size;
phys_addr_t bottom, top, found;
if (memblocksize < size)
continue;
if ((memblockbase + memblocksize) <= start)
break;
bottom = max(memblockbase, start);
top = min(memblockbase + memblocksize, end);
if (bottom >= top)
continue;
found = memblock_find_region(bottom, top, size, align);
if (found != MEMBLOCK_ERROR)
return found;
}
return MEMBLOCK_ERROR;
}
memblock应当是收集的内存信息的一个描述,回头再看一下,这里会找到这么个基地之然后将其
返回,以前这个地址应当是在紧挨着内核镜像bss后面,继续回到arm_bootmem_init
函数,下一个是
/**
* init_bootmem_node - register a node as boot memory
* @pgdat: node to register
* @freepfn: pfn where the bitmap for this node is to be placed
* @startpfn: first pfn on the node
* @endpfn: first pfn after the node
*
* Returns the number of bytes needed to hold the bitmap for this node.
*/
unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
unsigned long startpfn, unsigned long endpfn)
{
return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
}
/*
* Called once to set up the allocator itself.
* bootmem 核心初始化
*/
static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
unsigned long mapstart, unsigned long start, unsigned long end)
{
unsigned long mapsize;
mminit_validate_memmodel_limits(&start, &end);
bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart)); //bitmap起始地址
bdata->node_min_pfn = start; //节点起始地址
bdata->node_low_pfn = end; //结束地址
link_bootmem(bdata);
/*
* Initially all pages are reserved - setup_arch() has to
* register free RAM areas explicitly.
*/
mapsize = bootmap_bytes(end - start);
//又算了一遍????
memset(bdata->node_bootmem_map, 0xff, mapsize);
//node_bootmem_map记录bitmap起始地址,清空这里
bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",
bdata - bootmem_node_data, start, mapstart, end, mapsize);
return mapsize;
}
靠,现在的分配器真成分配器了,不带收集的??
总结:总括了bootmem分配器,介绍了其原理,后文准备跟一下memblock这个东东,以及那些分配函数具体实现^.^!.
相关阅读:http://www.linuxidc.com/Linux/2012-02/53140.htm
简介:介绍一下上文遗留的两个结构memblock以及meminfo.
相关阅读:http://www.linuxidc.com/Linux/2012-02/53139.htm
**********************************
* 在arch/arm/kernel/setup.c中,
* sanity_check_meminfo();
**********************************
arm_memblock_init(&meminfo, mdesc);
arch/arm/mm中,
void __init sanity_check_meminfo(void)
{
int i, j, highmem = 0;
//遍历所有的bank
for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
struct membank *bank = &meminfo.bank[j];
*bank = meminfo.bank[i];
#ifdef CONFIG_HIGHMEM
//如果定义了高端内存,地址范围在高端内,则标记
if (__va(bank->start) >= vmalloc_min ||
__va(bank->start) < (void *)PAGE_OFFSET)
highmem = 1;
bank->highmem = highmem; //标记
/*
* Split those memory banks which are partially overlapping
* the vmalloc area greatly simplifying things later.
*/
//范围在低端内存,但是size大于低端范围也就是
//起始地址在低端,结束地址超过低端范围
if (__va(bank->start) < vmalloc_min &&
bank->size > vmalloc_min - __va(bank->start)) {
//banks号大于等于总数量,则直接忽略该bank
//否则将该bank分成两个bank,高端部分标记成高端
if (meminfo.nr_banks >= NR_BANKS) {
printk(KERN_CRIT "NR_BANKS too low, "
"ignoring high memory\n");
} else {
memmove(bank + 1, bank,
(meminfo.nr_banks - i) * sizeof(*bank));
meminfo.nr_banks++;
i++;
bank[1].size -= vmalloc_min - __va(bank->start);
bank[1].start = __pa(vmalloc_min - 1) + 1;
bank[1].highmem = highmem = 1;
j++;
}
bank->size = vmalloc_min - __va(bank->start);
}
#else
bank->highmem = highmem; //没定义就赋值为0
/*
* Check whether this memory bank would entirely overlap
* the vmalloc area.
*/
//没定义高端地址,那么高端部分全部忽略
if (__va(bank->start) >= vmalloc_min ||
__va(bank->start) < (void *)PAGE_OFFSET) {
printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx "
"(vmalloc region overlap).\n",
(unsigned long long)bank->start,
(unsigned long long)bank->start + bank->size - 1);
continue;
}
/*
* Check whether this memory bank would partially overlap
* the vmalloc area.
*/
if (__va(bank->start + bank->size) > vmalloc_min ||
__va(bank->start + bank->size) < __va(bank->start)) {
unsigned long newsize = vmalloc_min - __va(bank->start);
printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx "
"to -%.8llx (vmalloc region overlap).\n",
(unsigned long long)bank->start,
(unsigned long long)bank->start + bank->size - 1,
(unsigned long long)bank->start + newsize - 1);
bank->size = newsize;
}
#endif
if (!bank->highmem && bank->start + bank->size > lowmem_limit)
lowmem_limit = bank->start + bank->size;
j++;
}
#ifdef CONFIG_HIGHMEM
//如果是高端内存,还要进行进一步支持的确认,vipt的不支持
if (highmem) {
const char *reason = NULL;
if (cache_is_vipt_aliasing()) {
/*
* Interactions between kmap and other mappings
* make highmem support with aliasing VIPT caches
* rather difficult.
*/
reason = "with VIPT aliasing cache";
}
if (reason) {
printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n",
reason);
while (j > 0 && meminfo.bank[j - 1].highmem)
j--;
}
}
#endif
meminfo.nr_banks = j;
memblock_set_current_limit(lowmem_limit);
}
这里其中vmalloc_vin 定义为static void * __initdata vmalloc_min = (void *)(VMALLOC_END - SZ_128M);
而vmalloc_end在2440上定义为arch/arm/mach-s3c2410/include/mach/vmalloc.h
#define VMALLOC_END 0xF6000000UL
上面又提到了meminfo看来不解决他也不行了,
先看定义,这个东西大概是这个位置,
arch/arm/kernel/setup.c-->setup_arch-->setup_machine_tags
这个函数中
...
if (mdesc->fixup) //一种方法,可惜2440没有
mdesc->fixup(tags, &from, &meminfo);
if (tags->hdr.tag == ATAG_CORE) {
if (meminfo.nr_banks != 0)
squash_mem_tags(tags);
save_atags(tags);
parse_tags(tags); //另一个地方
}
...
跟进去最终会到这里
/*
* Scan the tag table for this tag, and call its parse function.
* The tag table is built by the linker from all the __tagtable
* declarations.
*/
static int __init parse_tag(const struct tag *tag)
{
extern struct tagtable __tagtable_begin, __tagtable_end;
struct tagtable *t;
for (t = &__tagtable_begin; t < &__tagtable_end; t++)
if (tag->hdr.tag == t->tag) {
t->parse(tag);
break;
}
return t < &__tagtable_end;
}
__tagtable_begin同样单独的一个段,定义在arch/arm/vmlinux.lds,
__arch_info_begin = .;
*(.arch.info)
__arch_info_end = .;
__tagtable_begin = .;
*(.taglist)
__tagtable_end = .;
*(.data.init)
. = ALIGN(16);
其中同文件下
tatic int __init parse_tag_mem32(const struct tag *tag)
{
return arm_add_memory(tag->u.mem.start, tag->u.mem.size);
}
__tagtable(ATAG_MEM, parse_tag_mem32);
又有
#define __tag __used __attribute__((__section__(".taglist.init")))
#define __tagtable(tag, fn) \
static const struct tagtable __tagtable_##fn __tag = { tag, fn }
可见parse_tag_mem32就被定义于这个段,那么就会调用它,跟进去又会调用arm_add_memory(tag->u.mem.start, tag->u.mem.size);这个是我们的重点函数,会把参数列表传递过来的内存信息对齐后打包到meminfo,找到了meminfo怎么来的,回来再继续看刚才剩下的一个函数,arm_memblock_init(&meminfo, mdesc);这个函数比较简单,这里就不贴出来了,主要是把刚才得到的内存布局中已经使用的部分标记出来比如kernel占用部分,异常表占用部分等等...
总结:分析了上文遗留的两个结构memblock以及meminfo,bootmem结构到现在应当很清晰了,下文会简单分析下具体的bootmem的分配内存
函数 ^.^!
本篇文章来源于 Linux公社网站(www.linuxidc.com) 原文链接:http://www.linuxidc.com/Linux/2012-02/53140p2.htm