Linux 进程内存模型

Linux 进程内存模型 (1)

下图是一个简易的内存模型示意图。其中某些段 (Segment) 是从可执行文件加载的,有关 ELF Section 和 Segment 的映射关系,我们可以从 ELF Program Headers 中获取相关信息。

$ readelf -l hello

Elf file type is EXEC (Executable file)
Entry point 0x8048410
There are 8 program headers, starting at offset 52

Program Headers:
    Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align
    PHDR           0x000034 0x08048034 0x08048034 0x00100 0x00100 R E 0x4
    INTERP         0x000134 0x08048134 0x08048134 0x00013 0x00013 R   0x1
    LOAD           0x000000 0x08048000 0x08048000 0x0064c 0x0064c R E 0x1000
    LOAD           0x000f0c 0x08049f0c 0x08049f0c 0x0011c 0x00128 RW  0x1000
    DYNAMIC        0x000f20 0x08049f20 0x08049f20 0x000d0 0x000d0 RW  0x4
    NOTE           0x000148 0x08048148 0x08048148 0x00044 0x00044 R   0x4
    GNU_STACK      0x000000 0x00000000 0x00000000 0x00000 0x00000 RW  0x4
    GNU_RELRO      0x000f0c 0x08049f0c 0x08049f0c 0x000f4 0x000f4 R   0x1

 Section to Segment mapping:
    Segment Sections...
    01     .interp 
    02     ... .init .plt .text .fini .rodata
    03     ... .data .bss 
    04     .dynamic 
    05     .note.ABI-tag 
    07     .ctors .dtors .jcr .dynamic .got 

对照示意图,我们可以看到 .text, .rodata, .data, .bss 被加载到 0x08048000 之后,也就是序号 02, 03 两个 LOAD Segemtn 段中。ELF Section 信息中的 Virtual Address 也是一个参考。
$ readelf -S hello

There are 38 section headers, starting at offset 0x1a10:

Section Headers:
    [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
    ... ...
    [14] .text             PROGBITS        08048410 000410 0001ec 00  AX  0   0 16
    [16] .rodata           PROGBITS        08048618 000618 000030 00   A  0   0  4
    [24] .data             PROGBITS        0804a018 001018 000010 00  WA  0   0  4
    [25] .bss              NOBITS          0804a028 001028 00000c 00  WA  0   0  4
    [35] .shstrtab         STRTAB          00000000 0018b8 000156 00      0   0  1
    [36] .symtab           SYMTAB          00000000 002000 000540 10     37  56  4
    [37] .strtab           STRTAB          00000000 002540 000263 00      0   0  1
Key to Flags:
    W (write), A (alloc), X (execute), M (merge), S (strings)
    I (info), L (link order), G (group), x (unknown)
    O (extra OS processing required) o (OS specific), p (processor specific)

注意不是所有的 Section 都会被加载到进程内存空间。


(1) pmap
$ ps aux | grep hello | grep -v grep

yuhen     6649  0.0  1.6  39692  8404 pts/0    Sl+  Dec10   0:13 vim hello.c
yuhen    12787  0.0  0.0   1664   396 pts/1    S+   08:24   0:00 ./hello
$ pmap -x 12787

12787:   ./hello
Address   Kbytes     RSS    Anon  Locked Mode   Mapping
00110000    1272       -       -       - r-x--
0024e000       8       -       -       - r----
00250000       4       -       -       - rw---
00251000      12       -       -       - rw---    [ anon ]
002b2000     108       -       -       - r-x--
002cd000       4       -       -       - r----
002ce000       4       -       -       - rw---
00c4d000       4       -       -       - r-x--    [ anon ]
08048000       4       -       -       - r-x--  hello
08049000       4       -       -       - r----  hello
0804a000       4       -       -       - rw---  hello
09f89000     132       -       -       - rw---    [ anon ]
b7848000       4       -       -       - rw---    [ anon ]
b7855000      16       -       -       - rw---    [ anon ]
bfc40000      84       -       -       - rw---    [ stack ]
-------- ------- ------- ------- -------
total kB    1664       -       -       -

(2) maps
$ cat /proc/12787/maps

00110000-0024e000 r-xp 00000000 08:01 5231       /lib/tls/i686/cmov/
0024e000-00250000 r--p 0013e000 08:01 5231       /lib/tls/i686/cmov/
00250000-00251000 rw-p 00140000 08:01 5231       /lib/tls/i686/cmov/
00251000-00254000 rw-p 00000000 00:00 0 
002b2000-002cd000 r-xp 00000000 08:01 1809       /lib/
002cd000-002ce000 r--p 0001a000 08:01 1809       /lib/
002ce000-002cf000 rw-p 0001b000 08:01 1809       /lib/
00c4d000-00c4e000 r-xp 00000000 00:00 0          [vdso]
08048000-08049000 r-xp 00000000 08:01 135411     /home/yuhen/Projects/Learn.C/hello
08049000-0804a000 r--p 00000000 08:01 135411     /home/yuhen/Projects/Learn.C/hello
0804a000-0804b000 rw-p 00001000 08:01 135411     /home/yuhen/Projects/Learn.C/hello
09f89000-09faa000 rw-p 00000000 00:00 0          [heap]
b7848000-b7849000 rw-p 00000000 00:00 0 
b7855000-b7859000 rw-p 00000000 00:00 0 
bfc40000-bfc55000 rw-p 00000000 00:00 0          [stack]

(3) gdb
$ gdb --pid=12787

(gdb) info proc mappings

process 12619
cmdline = '/home/yuhen/Projects/Learn.C/hello'
cwd = '/home/yuhen/Projects/Learn.C'
exe = '/home/yuhen/Projects/Learn.C/hello'
Mapped address spaces:

    Start Addr   End Addr       Size     Offset objfile
    ... ...
    0x8048000  0x8049000     0x1000          0      /home/yuhen/Projects/Learn.C/hello
    0x8049000  0x804a000     0x1000          0      /home/yuhen/Projects/Learn.C/hello
    0x804a000  0x804b000     0x1000     0x1000      /home/yuhen/Projects/Learn.C/hello
    0x9f89000  0x9faa000    0x21000          0           [heap]
    0xb7848000 0xb7849000     0x1000          0        
    0xb7855000 0xb7859000     0x4000          0        
    0xbfc40000 0xbfc55000    0x15000          0           [stack
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>

int x = 0x1234;
char *s;

int test()
    static int a = 0x4567;
    static int b;

    return ++a;

int main(int argc, char* argv[])
    int i = test() + x;
    s = "Hello, World!";

    char* p = malloc(10);

    return EXIT_SUCCESS;

在分析 ELF 文件结构时我们就已经知道全局变量和静态局部变量在编译期就决定了其内存地址。
$ readelf -s hello

Symbol table '.symtab' contains 79 entries:
   Num:    Value  Size Type    Bind   Vis      Ndx Name
    ... ...

    50: 0804a018     4 OBJECT  LOCAL  DEFAULT   24 a.2344
    51: 0804a024     4 OBJECT  LOCAL  DEFAULT   25 b.2345
    57: 0804a028     4 OBJECT  GLOBAL DEFAULT   25 s
    65: 0804a014     4 OBJECT  GLOBAL DEFAULT   24 x

    ... ...
$ readelf -S hello

There are 38 section headers, starting at offset 0x1a10:

Section Headers:
    [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
    ... ...
    [16] .rodata           PROGBITS        080484f8 0004f8 000016 00   A  0   0  4
    [24] .data             PROGBITS        0804a00c 00100c 000010 00  WA  0   0  4
    [25] .bss              NOBITS          0804a01c 00101c 000010 00  WA  0   0  4

Key to Flags:
    W (write), A (alloc), X (execute), M (merge), S (strings)
    I (info), L (link order), G (group), x (unknown)
    O (extra OS processing required) o (OS specific), p (processor specific)

通过对比相关段,我们可确定已初始化的全局和静态变量被分配在 .data 中,而未初始化全局和静态变量则分配在 .bss。
.data 0804a00c ~ 0804a01b : x(0804a014), a(0804a018), 
.bss  0804a01c ~ 0804a02b : b(0804a024), s(0804a028)

而代码中的字符串 "Hello, World!" 被分配在 .rodata 中。
$ readelf -p .rodata hello

String dump of section '.rodata':
  [     8]  Hello, World!
$ readelf -x .rodata hello

Hex dump of section '.rodata':
  0x080484f8 03000000 01000200 48656c6c 6f2c2057 ........Hello, W
  0x08048508 6f726c64 2100                       orld!.

$ objdump -dS -M intel hello | less

int x = 0x1234;
char *s;

int test()
    80483e4:       push   ebp
    80483e5:       mov    ebp,esp
        static int a = 0x4567;
        static int b;

        return ++a;
    80483e7:       mov    eax,ds:0x804a018 ; 静态变量 a
    80483ec:       add    eax,0x1 ; 计算 (eax) = (eax) + 1
    80483ef:       mov    ds:0x804a018,eax ; 将结果存回 a
    80483f4:       mov    eax,ds:0x804a018

    ... ...

int main(int argc, char* argv[])
    int i = test() + x;
    8048404:       call   80483e4 <test> ; test() 返回值被存入 eax
    8048409:       mov    edx,DWORD PTR ds:0x804a014 ; 将全局变量 x 值放入 edx
    804840f:       add    eax,edx ; 计算 (eax) = test() + x
    8048411:       mov    DWORD PTR [esp+0x1c],eax ; 局部变量 i = (eax),  显然 i 在栈分配

    s = "Hello, World!";
    8048415:       mov    DWORD PTR ds:0x804a028,0x8048500 ; 将 .rodata "Hello..." 地址复制给 s
    ... ...

    char* p = malloc(10);
    804841f:       mov    DWORD PTR [esp],0xa
    8048426:       call   804831c <malloc@plt>
    804842b:       mov    DWORD PTR [esp+0x18],eax

    return EXIT_SUCCESS;
    804842f:       mov    eax,0x0

也可以用 gdb 查看运行期分配状态。
(gdb) p &i ; main() 局部变量 i 地址
$1 = (int *) 0xbffff74c

(gdb) p p ; malloc 返回空间指针 p 
$2 = 0x804b008 ""

(gdb) info proc mappings

Mapped address spaces:
    Start Addr   End Addr       Size     Offset objfile
    0x804b000  0x806c000     0x21000          0           [heap]
    0xbffeb000 0xc0000000    0x15000          0           [stack]

很显然,局部变量 i 分配在 Stack,而 malloc p 则是在 Heap 上分配。
