linux版本:2.6.36
相关数据结构
arch/arm/include/asm/setup.h
#ifdef CONFIG_ARCH_LH7A40X
# define NR_BANKS 16
#else
# define NR_BANKS 8
#endif
struct membank {
unsigned long start;
unsigned long size;
unsigned int highmem;
};
struct meminfo {
int nr_banks;
struct membank bank[NR_BANKS];
};
linux内核的内存管理分三个阶段。
1. 启动---->bootmem初始化完成为第一阶段。此阶段只能使用memblock_reserve函数分配内存。
此阶段结束标志为:init_bootmem_done = 1.
2. bootmem初始化完--->buddy完成前。结束标志为mem_init_done = 1.
3. 全部内存初始化完毕,可以用cache和buddy分配内存。
内存初始化步骤:
1. start_kernel---->setup_arch->pageing_init
|-->setup_per_cpu_areas
|-->build_all_zonelists
|-->mem_init
-->setup_per_cpu_pageset
setup_arch-->arm_memblock_init-->memblock_init
内存原始数据由u-boot传入,对照本开发板uboot部分内存初始化函数,我们知道uboot传递过来的tag->u.mem.start, tag->u.mem.size分别为0x30000000,0x4000000,即内存起始地址是0x30000000,大小为64M,start_arch获取u-boot传递的参数地址后,调用了parse_tag_mem32函数对传递的内存参数处理:
556 static int __init parse_tag_mem32(const struct tag *tag)
557 {
558 return arm_add_memory(tag->u.mem.start, tag->u.mem.size);
559 }
560
561 __tagtable(ATAG_MEM, parse_tag_mem32);
如上可见,parse_tag_mem32函数调用arm_add_memory函数把RAM的start和size等参数保存到了meminfo结构的 meminfo结构体中。现在再来分析arm_add_memory
arm_add_memory定义如下(arch/arm/kernel/setup.c)
static int __init arm_add_memory(unsigned long start, unsigned long size)
{
struct membank *bank = &meminfo.bank[meminfo.nr_banks];
if (meminfo.nr_banks >= NR_BANKS) {
printk(KERN_CRIT "NR_BANKS too low, "
"ignoring memory at %#lx\n", start);
return -EINVAL;
}
/*
* Ensure that start/size are aligned to a page boundary.
* Size is appropriately rounded down, start is rounded up.
*/
size -= start & ~PAGE_MASK;
bank->start = PAGE_ALIGN(start);
bank->size = size & PAGE_MASK;
/*
* Check whether this memory region has non-zero size or
* invalid node number.
*/
if (bank->size == 0)
return -EINVAL;
meminfo.nr_banks++;
return 0;
}
经过这样的处理,setup.c文件中的meminfo可就不再是
struct meminfo meminfo = { 0, };
而是
struct meminfo meminfo = { 1,{0x30000000,0x4000000,0},{}, };
表示当前有一个内存区域,物理地址是从0x30000000开始,大小是64M,节点是0
处理完这些数据后,start_arch会调用第822行调用arm_memblock_init(&meminfo, mdesc);
先分析第1阶段。
1. 初始化:setup_arch-->arm_memblock_init-->memblock_init
arch/arm/mm/init.c
270 void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
271 {
272 int i;
273
274 memblock_init();
275 for (i = 0; i < mi->nr_banks; i++)
276 memblock_add(mi->bank[i].start, mi->bank[i].size);
277
278 /* Register the kernel text, kernel data and initrd with memblock. */
279 #ifdef CONFIG_XIP_KERNEL //配置内核XIP方式运行,避免把内核从Nor Flash 拷贝到主存SDRAM 空间,让内核运行在低功耗的NOR flash 上,节省系统启动时间,降低系统对SDRAM 的需要,减少电能消耗,使产品能够持续使用更长时间
280 memblock_reserve(__pa(_data), _end - _data);
281 #else
282 memblock_reserve(__pa(_stext), _end - _stext);
/*第282行,在0节点中保留内核镜像部分的内存,大概是0x30108000到以后的5M左右(解压后的内核)。
参看arch/arm/kernel/vmlinux.lds文件,
375 SECTIONS
376 {
377 . = 0xC0000000 + 0x00108000;
378 .init : { /* Init code and data */
379 _stext = .;
所以stext等于c0108000,对应物理地址30108000,
对_end再参看arch/arm/kernel/vmlinux.lds.S文件,它也在SECTIONS区,在233行定义
232 BSS_SECTION(0, 0, 0)
233 _end = .;
对于我移植的2.3.36内核,_end等于c0555b60,对应物理地址30555b60,在此语句后添加打印信息得到
_end - _stext=44db60,4.512608M大小,所以这一句功能就是把0x30108000 ~ 0x30555b60这段(4.512608M)空间保留下来。
*/
283 #endif
284 #ifdef CONFIG_BLK_DEV_INITRD //本开发板没配置
285 if (phys_initrd_size) {
286 memblock_reserve(phys_initrd_start, phys_initrd_size);
287
288 /* Now convert initrd to virtual addresses */
289 initrd_start = __phys_to_virt(phys_initrd_start);
290 initrd_end = initrd_start + phys_initrd_size;
291 }
292 #endif
293
294 arm_mm_memblock_reserve();//就是把0x30104000 ~ 0x30108000这段(16K)用于页目录的内容保留下来。
295
296 /* reserve any platform specific memblock areas */
297 if (mdesc->reserve)//对st2410中没有定义
298 mdesc->reserve();
299
300 memblock_analyze();//计算内存块大小
301 memblock_dump_all();//显示
302 }
内存原始数据由u-boot传入,在初始化完memblock_init后,arm_memblock_init函数中memblock_add调用 memblock_add_region加入原始内存数据,我的板子上配了64M内存,即:0x0000 0000->0x40000000,加完后的配置如下:
MEMBLOCK configuration:
rmo_size = 0x0
memory.size = 0x4000000
memory.cnt = 0x1
memory[0x0] 0x0000000030000000 - 0x0000000033ffffff, 0x4000000 bytes
reserved.cnt = 0x1
reserved[0x0] 0x0000000030104000 - 0x0000000030555b5f, 0x451b60 bytes
memblock_init()在mm/memblock.c里面被定义。
void __init memblock_init(void)
{
/* Create a dummy zero size MEMBLOCK which will get coalesced away later.
* This simplifies the memblock_add() code below...
*/
memblock.memory.region[0].base = 0;
memblock.memory.region[0].size = 0;
memblock.memory.cnt = 1;
/* Ditto. */
memblock.reserved.region[0].base = 0;
memblock.reserved.region[0].size = 0;
memblock.reserved.cnt = 1;
}
其作用就是初始化memblock这个结构。将他们清空。memblock包含两个重要的成员,分别是memblock.memory和memblock.reserved.其分别代表系统中可用的内存和已经被保留的内存。
memblock.memory和memblock.reserved被定义为以下结构:include/linux/memblock.h
#define MAX_MEMBLOCK_REGIONS 128
struct memblock_property {
u64 base;
u64 size;
};
struct memblock_region {
unsigned long cnt;
u64 size;
struct memblock_property region[MAX_MEMBLOCK_REGIONS+1];
};
struct memblock {
unsigned long debug;
u64 rmo_size;
struct memblock_region memory;
struct memblock_region reserved;
};
mm/memblock.c
long memblock_add(u64 base, u64 size)
{
struct memblock_region *_rgn = &memblock.memory;
/* On pSeries LPAR systems, the first MEMBLOCK is our RMO region. */
if (base == 0)
memblock.rmo_size = size;
return memblock_add_region(_rgn, base, size);
}
static long memblock_add_region(struct memblock_region *rgn, u64 base, u64 size)
{
unsigned long coalesced = 0;
long adjacent, i;
if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) {
rgn->region[0].base = base;
rgn->region[0].size = size;
return 0;
}
/* First try and coalesce this MEMBLOCK with another. */
for (i = 0; i < rgn->cnt; i++) {
u64 rgnbase = rgn->region[i].base;
u64 rgnsize = rgn->region[i].size;
if ((rgnbase == base) && (rgnsize == size))
/* Already have this region, so we're done */
return 0;
adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize);
if (adjacent > 0) {
rgn->region[i].base -= size;
rgn->region[i].size += size;
coalesced++;
break;
} else if (adjacent < 0) {
rgn->region[i].size += size;
coalesced++;
break;
}
}
if ((i < rgn->cnt - 1) && memblock_regions_adjacent(rgn, i, i+1)) {
memblock_coalesce_regions(rgn, i, i+1);
coalesced++;
}
if (coalesced)
return coalesced;
if (rgn->cnt >= MAX_MEMBLOCK_REGIONS)
return -1;
/* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */
for (i = rgn->cnt - 1; i >= 0; i--) {
if (base < rgn->region[i].base) {
rgn->region[i+1].base = rgn->region[i].base;
rgn->region[i+1].size = rgn->region[i].size;
} else {
rgn->region[i+1].base = base;
rgn->region[i+1].size = size;
break;
}
}
if (base < rgn->region[0].base) {
rgn->region[0].base = base;
rgn->region[0].size = size;
}
rgn->cnt++;
return 0;
}
memblock_add_region函数作用是将给定的物理地址所指定的memory region加入到指定的memblock(memblock.reserved或者是memblock.memory)中。新加入的memory region需要经过检查,如果与原先的memory region有重叠,则需要合并在原先的memory region中,否则的话就新建一个memory region.
3. memblock_reserve用来分配内存页。
以分配内核本身占用的内存为例:
/* Register the kernel text, kernel data and initrd with memblock. */
memblock_reserve(__pa(_stext), _end - _stext);
_stext,_end参见arch/arm/kernel/vmlinux.lds.S 链接脚本
long __init memblock_reserve(u64 base, u64 size)
{
struct memblock_region *_rgn = &memblock.reserved;
BUG_ON(0 == size);
return memblock_add_region(_rgn, base, size);
}
分配完之后的内存配置为:
MEMBLOCK configuration:
rmo_size = 0x40000000
memory.size = 0x0
memory.cnt = 0x1
memory[0x0] 0x0000000000000000 - 0x000000003fffffff, 0x40000000 bytes
reserved.cnt = 0x1
reserved[0x0] 0x0000000000000000 - 0x0000000000xxxxx, _end - _stext bytes
如果分配有连续则进行合并。
几次分配后的配置如下:
MEMBLOCK configuration:
rmo_size = 0x80000000
memory.size = 0x80000000
memory.cnt = 0x1
memory[0x0] 0x0000000000000000 - 0x000000007fffffff, 0x80000000 bytes
reserved.cnt = 0x6
reserved[0x0] 0x0000000000000000 - 0x00000000006b0fff, 0x6b1000 bytes
reserved[0x1] 0x0000000000ffa000 - 0x0000000000ffcfff, 0x3000 bytes
reserved[0x2] 0x000000002fbc4000 - 0x000000002fbdefff, 0x1b000 bytes
reserved[0x3] 0x000000002fbdfa88 - 0x000000002ffff4cc, 0x41fa45 bytes
reserved[0x4] 0x000000002fbe4000 - 0x000000002ffff4cd, 0x41b4ce bytes
reserved[0x5] 0x000000007ffff000 - 0x000000007fffffff, 0x1000 bytes
init
arch/arm/mm/mmu.c
/*
* Reserve the special regions of memory
*/
void __init arm_mm_memblock_reserve(void)
{
/*
* Reserve the page tables. These are already in use,
* and can only be in node 0.
*/
memblock_reserve(__pa(swapper_pg_dir), PTRS_PER_PGD * sizeof(pgd_t));
/*
相关的信息
swapper_pg_dir是初始化页表虚拟地址,它在
./arch/arm/kernel/head.S定义
44: .globl swapper_pg_dir
45: .equ swapper_pg_dir, KERNEL_RAM_VADDR - 0x4000
./arch/arm/kernel/head.S:55:#define KERNEL_START KERNEL_RAM_VADDR
所以swapper_pg_dir等于0xc0104000,对应物理地址0x30104000
PTRS_PER_PGD在arch/arm/include/asm/pgtable.h第103行定义
#define PTRS_PER_PGD 2048
而pgd_t定义为
typedef unsigned long pgd_t[2];
PTRS_PER_PGD * sizeof(pgd_t)=2048*8=16384的大小为0x00004000 (16K)
就是把0x30104000 ~ 0x30108000这段(16K)用于页目录的内容保留下来。
*/
#ifdef CONFIG_SA1111 //2410没定义
/*
* Because of the SA1111 DMA bug, we want to preserve our
* precious DMA-able memory...
*/
memblock_reserve(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET);
#endif
}
arch/arm/mm/init.c
#define MLK(b, t) b, t, ((t) - (b)) >> 10 //右移10位为K
#define MLM(b, t) b, t, ((t) - (b)) >> 20 //右移20位为M
#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
printk(KERN_NOTICE "Virtual kernel memory layout:\n"
" vector : 0x%08lx - 0x%08lx (%4ld kB)\n"
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
#ifdef CONFIG_MMU
" DMA : 0x%08lx - 0x%08lx (%4ld MB)\n"
#endif
" vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
" lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
#ifdef CONFIG_HIGHMEM
" pkmap : 0x%08lx - 0x%08lx (%4ld MB)\n"
#endif
" modules : 0x%08lx - 0x%08lx (%4ld MB)\n"
" .init : 0x%p" " - 0x%p" " (%4d kB)\n"
" .text : 0x%p" " - 0x%p" " (%4d kB)\n"
" .data : 0x%p" " - 0x%p" " (%4d kB)\n",
MLK(UL(CONFIG_VECTORS_BASE), UL(CONFIG_VECTORS_BASE) +
(PAGE_SIZE)),
MLK(FIXADDR_START, FIXADDR_TOP),
#ifdef CONFIG_MMU
MLM(CONSISTENT_BASE, CONSISTENT_END),
#endif
MLM(VMALLOC_START, VMALLOC_END),
MLM(PAGE_OFFSET, (unsigned long)high_memory),
#ifdef CONFIG_HIGHMEM
MLM(PKMAP_BASE, (PKMAP_BASE) + (LAST_PKMAP) *
(PAGE_SIZE)),
#endif
MLM(MODULES_VADDR, MODULES_END),
MLK_ROUNDUP(__init_begin, __init_end),
MLK_ROUNDUP(_text, _etext),
MLK_ROUNDUP(_data, _edata));
系统启动的时候打印出:
stext:c0108000,__pa_stext:30108000, _end - _stext=44db60
_end:c0555b60,__pa(_end)=30555b60
Memory: 64MB = 64MB total
Memory: 60536k/60536k available, 5000k reserved, 0K highmem
Virtual kernel memory layout:
vector : 0xffff0000 - 0xffff1000 ( 4 kB)
fixmap : 0xfff00000 - 0xfffe0000 ( 896 kB)
DMA : 0xffc00000 - 0xffe00000 ( 2 MB)
vmalloc : 0xc4800000 - 0xe0000000 ( 440 MB)
lowmem : 0xc0000000 - 0xc4000000 ( 64 MB)
modules : 0xbf000000 - 0xc0000000 ( 16 MB)
.init : 0xc0108000 - 0xc012e000 ( 152 kB)---->(系统启动过程中 以__init宏标识的函数占用的空间 被vmlinux.lds标识为 __init_begin __init_end 启动init进程前被释放掉 :Freeing init memory: 152K)
.text : 0xc012e000 - 0xc04dd000 (3772 kB)
.data : 0xc04fe000 - 0xc0527500 ( 166 kB)
Hierarchical RCU implementation.
启动后查看内存命令显示如下:
[root@localhost /]# free
total used free shared buffers
Mem: 60688 6644 54044 0 0
Swap: 0 0 0
Total: 60688 6644 54044
[root@localhost /]#
保留内存5000K中 linux内核占用了152+3772+166=4090K
加上boot的时候参数等平台空间占用32K = 3149K
其它还差3464-3149=315K 没有着落
其它还包括影射的向量表4K
还有可能是系统管理内存的页表占用空间等其它的占用
系统启动成功后 Freeing init memory: 100K
所以用free命令看会多了100K
而free命令中看到的used基本是缓存 buffer占用的,为了提高i/o速度的缓存,很多都并非真正应用
在我应用中去试图malloc更多的内存的时候 used中很多都能被malloc出来的.
mm/memblock.c
void __init memblock_analyze(void)
{
int i;
memblock.memory.size = 0;
for (i = 0; i < memblock.memory.cnt; i++)
memblock.memory.size += memblock.memory.region[i].size;
}
mm/memblock.c
static void memblock_dump(struct memblock_region *region, char *name)
{
unsigned long long base, size;
int i;
pr_info(" %s.cnt = 0x%lx\n", name, region->cnt);
for (i = 0; i < region->cnt; i++) {
base = region->region[i].base;
size = region->region[i].size;
pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
name, i, base, base + size - 1, size);
}
}
mm/memblock.c
void memblock_dump_all(void)
{
if (!memblock_debug)
return;
pr_info("MEMBLOCK configuration:\n");
pr_info(" rmo_size = 0x%llx\n", (unsigned long long)memblock.rmo_size);
pr_info(" memory.size = 0x%llx\n", (unsigned long long)memblock.memory.size);
memblock_dump(&memblock.memory, "memory");
memblock_dump(&memblock.reserved, "reserved");
}
22 static int memblock_debug;
23
24 static int __init early_memblock(char *p)
25 {
26 if (p && strstr(p, "debug"))
27 memblock_debug = 1;
28 return 0;
29 }
30 early_param("memblock", early_memblock);