内核中dump_stack的实现原理(1) —— 栈回溯

环境

Aarch64
Qemu
aarch64-linux-gnu-gcc
linux-4.14
 

概述

     栈回溯的目的是将函数的调用栈打印出来,对于分析函数调用和debug系统异常会很有帮助。对于 Aarch64,x29用于用来当做帧指针,x30用来存放函数返回地址。
 

正文

原理

首先通过一个简单的程序分析一下栈回溯的原理,下面是测试程序:
 1 #include 
 2 
 3 int func3(int b)
 4 {
 5     int a = 10;
 6     printf("a = %d\n", a + b);
 7     return a;
 8 }
 9 
10 int func2(int d)
11 {
12     int b;
13     
14     b = func3(d);
15     printf("b = %d\n", b + d);
16     return b;
17 }
18 
19 int func1(int a)
20 {
21     int d;
22 
23     d = func2(a);
24     printf("d = %d\n", d);
25     return d;
26 }
27 
28 int main(int argc, const char *argv[])
29 {
30     int a = 10;
31 
32     func1(a);
33     return 0;
34 }

 然后我们对其进行编译和反汇编:

aarch64-linux-gnu-gcc a.c -o main
aarch64-linux-gnu-objdump -D main > main.S

 

下面是main.S文件:
 1 000000000040055c :
 2   40055c:    a9bd7bfd     stp    x29, x30, [sp, #-48]!
 3   400560:    910003fd     mov    x29, sp
 4   400564:    b9001fa0     str    w0, [x29, #28]
 5   400568:    52800140     mov    w0, #0xa                       // #10
 6   40056c:    b9002fa0     str    w0, [x29, #44]
 7   400570:    b9402fa1     ldr    w1, [x29, #44]
 8   400574:    b9401fa0     ldr    w0, [x29, #28]
 9   400578:    0b000021     add    w1, w1, w0
10   40057c:    90000000     adrp    x0, 400000 <_init-0x3e8>
11   400580:    911b8000     add    x0, x0, #0x6e0
12   400584:    97ffffb3     bl    400450 
13   400588:    b9402fa0     ldr    w0, [x29, #44]
14   40058c:    a8c37bfd     ldp    x29, x30, [sp], #48
15   400590:    d65f03c0     ret
16 
17 0000000000400594 :
18   400594:    a9bd7bfd     stp    x29, x30, [sp, #-48]!
19   400598:    910003fd     mov    x29, sp
20   40059c:    b9001fa0     str    w0, [x29, #28]
21   4005a0:    b9401fa0     ldr    w0, [x29, #28]
22   4005a4:    97ffffee     bl    40055c 
23   4005a8:    b9002fa0     str    w0, [x29, #44]
24   4005ac:    b9402fa1     ldr    w1, [x29, #44]
25   4005b0:    b9401fa0     ldr    w0, [x29, #28]
26   4005b4:    0b000021     add    w1, w1, w0
27   4005b8:    90000000     adrp    x0, 400000 <_init-0x3e8>
28   4005bc:    911ba000     add    x0, x0, #0x6e8
29   4005c0:    97ffffa4     bl    400450 
30   4005c4:    b9402fa0     ldr    w0, [x29, #44]
31   4005c8:    a8c37bfd     ldp    x29, x30, [sp], #48
32   4005cc:    d65f03c0     ret
33 
34 00000000004005d0 :
35   4005d0:    a9bd7bfd     stp    x29, x30, [sp, #-48]!
36   4005d4:    910003fd     mov    x29, sp
37   4005d8:    b9001fa0     str    w0, [x29, #28]
38   4005dc:    b9401fa0     ldr    w0, [x29, #28]
39   4005e0:    97ffffed     bl    400594 
40   4005e4:    b9002fa0     str    w0, [x29, #44]
41   4005e8:    90000000     adrp    x0, 400000 <_init-0x3e8>
42   4005ec:    911bc000     add    x0, x0, #0x6f0
43   4005f0:    b9402fa1     ldr    w1, [x29, #44]
44   4005f4:    97ffff97     bl    400450 
45   4005f8:    b9402fa0     ldr    w0, [x29, #44]
46   4005fc:    a8c37bfd     ldp    x29, x30, [sp], #48
47   400600:    d65f03c0     ret
48 
49 0000000000400604 
: 50 400604: a9bd7bfd stp x29, x30, [sp, #-48]! 51 400608: 910003fd mov x29, sp 52 40060c: b9001fa0 str w0, [x29, #28] 53 400610: f9000ba1 str x1, [x29, #16] 54 400614: 52800140 mov w0, #0xa // #10 55 400618: b9002fa0 str w0, [x29, #44] 56 40061c: b9402fa0 ldr w0, [x29, #44] 57 400620: 97ffffec bl 4005d0 58 400624: 52800000 mov w0, #0x0 // #0 59 400628: a8c37bfd ldp x29, x30, [sp], #48 60 40062c: d65f03c0 ret
 
main:
第50行,将main函数的返回地址和上级函数的栈底存入main函数的栈底,剩余的main栈用于存放main的局部变量
第59行,执行完毕后,x30中存放的是main的返回地址,x29指向的是上一级函数的栈底
func1:
第35行, 将func1函数的返回地址和main函数的栈底存入func1函数的栈底,剩余的func1栈用于存放func1的局部变量
第46行, 执行完毕后,x30中存放的是func1的返回地址,即第58行,x29指向的是main函数的栈底
func2:
第18行,将func2函数的返回地址和func1函数的栈底存入func2函数的栈底,剩余的func2栈用于存放func2的局部变量
第31行, 执行完毕后,x30中存放的是func2的返回地址,即第40行,x29指向的是func1函数的栈底
func3:
第2行,将func3函数的返回地址func3函数的栈底,剩余的func3栈用于存放func3的局部变量
第14行,执行完毕后,x30中存放的是func3的返回地址,即第23行,x29指向的是func2函数的栈底
 
最终可以得到下面的示意图:
内核中dump_stack的实现原理(1) —— 栈回溯_第1张图片
 
所以,在函数func3中,就可以通过上面的结构就可以从func3回溯到main函数。
gcc提供了编译选项-fomit-frame-pointer和-fno-omit-frame-pointer,如果在编译时指定了 -fno-omit-frame-pointer,那么就没有帧指针了,所以也就无法进行栈回溯了,默认有帧指针。

使用API进行栈回溯

在用户空间提供了回溯用的API:
#include 
int backtrace(void **buffer, int size);
char **backtrace_symbols(void *const *buffer, int size);

 在func3使用上面的两个函数回溯一下:

 1 #include 
 2 
 3 int func3(int b)
 4 {
 5     int a = 10, n, i;
 6     void *buffer[10];
 7     char **strings;
 8 
 9     n = backtrace(buffer, 10);
10     strings = backtrace_symbols(buffer, n);
11 
12     for (i = 0; i < n; i++)
13         printf("%s\n", strings[i]);
14 
15     printf("a = %\n", a + b);
16 
17     return a;
18
 
编译:
aarch64-linux-gnu-gcc -funwind-tables -rdynamic -O0 -g a.c -o main
 
运行:
[root@aarch64 ~]# ./main 
./main(func3+0x20) [0x400a1c]
./main(func2+0x14) [0x400aa4]
./main(func1+0x14) [0x400ae0]
./main(main+0x20) [0x400b20]
/lib/libc.so.6(__libc_start_main+0xec) [0xffffb8d732c8]
a = 20
b = 20
d = 10
 

使用内敛汇编进行栈回溯

如果不使用上面的API的话,也可以用访问寄存器的方式来完成:
 1 #include 
 2 
 3 typedef struct {
 4     unsigned long x29;
 5     unsigned long x30;
 6 } node_t;
 7 
 8 void back(void)
 9 {
10     node_t *addr;
11 
12     printf("\nBackstrace not use api:\n");
13 
14     asm volatile("mov %0, x29\n\t":"=r"(addr)::);
15     while(addr && addr->x30 && addr->x29) {
16         printf("\t%p\n", addr->x30);
17         addr = (node_t *)addr->x29;
18     }
19 }
20 
21 int func3(int b)
22 {
23     int a = 10, n, i;
24     void *buffer[10];
25     char **strings;
26 
27     n = backtrace(buffer, 10);
28     strings = backtrace_symbols(buffer, n);
29 
30     printf("\nBackstrace use api:\n");
31     for (i = 0; i < n; i++)
32         printf("\t%s\n", strings[i]);
33 
34     back();
35 
36     printf("a = %d\n", a + b);
37     return a;
38 }
 
下面是运行结果:
[root@aarch64 ~]# ./main

Backstrace use api:
        ./main(func3+0x20) [0x400ab4]
        ./main(func2+0x14) [0x400b54]
        ./main(func1+0x14) [0x400b90]
        ./main(main+0x20) [0x400bd0]
        /lib/libc.so.6(__libc_start_main+0xec) [0xffff993d12c8]

Backstrace not use api:
        0x400b1c
        0x400b54
        0x400b90
        0x400bd0
        0xffff993d12c8
a = 20
b = 20
d = 10

 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

 

 

使用GCC的内部函数进行栈回溯

gcc提供了两个内置的函数用来获取函数的返回地址和帧指针的值:
内核中dump_stack的实现原理(1) —— 栈回溯_第2张图片
内核中dump_stack的实现原理(1) —— 栈回溯_第3张图片
内核中dump_stack的实现原理(1) —— 栈回溯_第4张图片
 
下面用这两个宏实现以下回溯:
 1 #include 
 2 
 3 typedef struct {
 4     unsigned long x29;
 5     unsigned long x30;
 6 } node_t;
 7 
 8 void back(void)
 9 {
10     node_t *addr;
11 
12     printf("\nBackstrace not use api:\n");
13 
14     asm volatile("mov %0, x29\n\t":"=r"(addr)::);
15     while(addr && addr->x30 && addr->x29) {
16         printf("\t%p\n", addr->x30);
17         addr = (node_t *)addr->x29;
18     }
19 }
20 
21 void back_builtin(void)
22 {
23     node_t *addr;
24 
25     printf("\nBackstrace using builtin func:\n");
26 
27     printf("the return address of back_builtin: %p\n",
28         __builtin_return_address(0));
29     addr = __builtin_frame_address(0);
30     while(addr && addr->x30 && addr->x29) {
31         printf("\t%p\n", addr->x30);
32         addr = (node_t *)addr->x29;
33     }
34 }
35 
36 int func3(int b)
37 {
38     int a = 10, n, i;
39     void *buffer[10];
40     char **strings;
41 
42     n = backtrace(buffer, 10);
43     strings = backtrace_symbols(buffer, n);
44 
45     printf("\nBackstrace use api:\n");
46     for (i = 0; i < n; i++)
47         printf("\t%s\n", strings[i]);
48 
49     back();
50 
51     back_builtin();
52 
53     printf("a = %d\n", a + b);
54     return a;
55 } 
 
 
运行结果:
[root@aarch64 ~]# ./main 

Backstrace use api:
        ./main(func3+0x20) [0x400b74]
        ./main(func2+0x14) [0x400c18]
        ./main(func1+0x14) [0x400c54]
        ./main(main+0x20) [0x400c94]
        /lib/libc.so.6(__libc_start_main+0xec) [0xffffb92632c8]

Backstrace not use api:
        0x400bdc
        0x400c18
        0x400c54
        0x400c94
        0xffffb92632c8

Backstrace using builtin func:
the return address of back_builtin: 0x400be0
        0x400be0
        0x400c18
        0x400c54
        0x400c94
        0xffffb92632c8
a = 20
b = 20
d = 10

不使用API进行栈回溯的缺点是只能打印出地址,但是却无法打印出具体的符号名字。

 

内核的栈回溯

在Linux内核中可以使用WARN_ON还输出当前的函数调用栈。下面是一个测试程序输出的log:
[   20.419376] ------------[ cut here ]------------
[   20.420062] WARNING: CPU: 0 PID: 1115 at /home/pengdonglin/disk_ext/Qemu/aarch64/demo_driver/demo.c:35 demo_init+0xc/0x1000 [demo]
[   20.420546] Modules linked in: demo(O+)
[   20.421004] CPU: 0 PID: 1115 Comm: insmod Tainted: G           O    4.14.92-ga06114e5 #3
[   20.421371] Hardware name: linux,dummy-virt (DT)
[   20.421600] task: ffff80000831af00 task.stack: ffff00000aef8000
[   20.421899] PC is at demo_init+0xc/0x1000 [demo]
[   20.422142] LR is at do_one_initcall+0x44/0x130
[   20.422360] pc : [] lr : [] pstate: 40000145
[   20.422657] sp : ffff00000aefbc40
[   20.422853] x29: ffff00000aefbc40 x28: ffff000000c520d0 
[   20.423140] x27: 00000000014000c0 x26: ffff80000836d800 
[   20.423367] x25: ffff0000081b3848 x24: ffff800079596080 
[   20.423570] x23: 0000000000000001 x22: ffff800079596200 
[   20.423787] x21: ffff80000831af00 x20: 0000000000000000 
[   20.423989] x19: ffff000000c55000 x18: 0000000000000000 
[   20.424189] x17: 0000000000002571 x16: 0000000000002570 
[   20.424390] x15: ffffffffffffffff x14: ffffffffffffffff 
[   20.424591] x13: ffffffffffffffff x12: 00000000a49cf051 
[   20.424791] x11: ffff80000831b7f0 x10: 0000000000000001 
[   20.425019] x9 : ffff00000aefba10 x8 : ffff0000091fc000 
[   20.425227] x7 : ffff000008246790 x6 : 0000000000000000 
[   20.425428] x5 : 0000000000000100 x4 : 0000000000000000 
[   20.425629] x3 : 0000000000000000 x2 : 0000000000000001 
[   20.425828] x1 : ffff80000831af00 x0 : 0000000000000000 
[   20.426106] 
[   20.426106] X1: 0xffff80000831ae80:
[   20.426292] ae80  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.426774] aea0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.427177] aec0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.427568] aee0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.427973] af00  00000020 00000000 ffffffff ffffffff 00000000 00000000 00000000 00000000
[   20.428389] af20  0aef8000 ffff0000 00000002 00400100 00000000 00000000 00000000 00000000
[   20.428899] af40  00000001 00000000 00000007 00000000 fffeee58 00000000 7c082f00 ffff8000
[   20.429415] af60  00000000 00000001 00000078 00000078 00000078 00000000 08c60b88 ffff0000
[   20.429962] 
[   20.429962] X11: 0xffff80000831b770:
[   20.430199] b770  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.430706] b790  00000000 00000000 00000a50 00000000 082c2790 ffff0000 080816dc ffff0000
[   20.431223] b7b0  00000a4f 00000a50 00000000 00000000 080ed6bc ffff0000 08081f18 ffff0000
[   20.431743] b7d0  00000989 000009b8 00000001 00000000 00000000 00000000 00000000 00000000
[   20.432262] b7f0  00000000 00000000 08175f18 ffff0000 09198b88 ffff0000 00000000 00000000
[   20.432779] b810  000c0001 00000000 683da59d 64dfae01 08175c7c ffff0000 09198b88 ffff0000
[   20.433297] b830  00000000 00000000 000c0001 00000000 4ccad968 18f18caf 08175e48 ffff0000
[   20.433813] b850  09198cb8 ffff0000 00000000 00000000 000c0005 00000000 e69cf65f 335c3458
[   20.434334] 
[   20.434334] X21: 0xffff80000831ae80:
[   20.434561] ae80  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.435073] aea0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.435598] aec0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.436228] aee0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.436756] af00  00000020 00000000 ffffffff ffffffff 00000000 00000000 00000000 00000000
[   20.437271] af20  0aef8000 ffff0000 00000002 00400100 00000000 00000000 00000000 00000000
[   20.437789] af40  00000001 00000000 00000007 00000000 fffeee58 00000000 7c082f00 ffff8000
[   20.438466] af60  00000000 00000001 00000078 00000078 00000078 00000000 08c60b88 ffff0000
[   20.439138] 
[   20.439138] X22: 0xffff800079596180:
[   20.439386] 6180  746f6e2e 6e672e65 75622e75 2d646c69 00006469 00000000 00000000 00000000
[   20.440006] 61a0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.440529] 61c0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.441098] 61e0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.441608] 6200  79596480 ffff8000 6f6d6564 00000000 00c55000 ffff0000 00000000 00000000
[   20.442065] 6220  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.442529] 6240  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.443078] 6260  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.443652] 
[   20.443652] X24: 0xffff800079596000:
[   20.443856] 6000  7274732e 00626174 00000000 00000000 00000000 00000000 00000000 00000000
[   20.444321] 6020  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.444809] 6040  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.445273] 6060  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.445707] 6080  79596100 ffff8000 00000001 00000000 7a34a700 ffff8000 00000124 00000000
[   20.446208] 60a0  09e94940 ffff0000 00000000 00000000 00000024 00000000 00c51000 ffff0000
[   20.446775] 60c0  081b3720 ffff0000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.447339] 60e0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.447919] 
[   20.447919] X26: 0xffff80000836d780:
[   20.448134] d780  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.448637] d7a0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.449144] d7c0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.449641] d7e0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   20.450148] d800  08eee848 ffff0000 00000000 00000000 00000000 00000000 0836dca8 ffff8000
[   20.450646] d820  00000000 00000000 0000000d 00000000 7a34a700 ffff8000 00000124 00000000
[   20.451149] d840  09e94938 ffff0000 00000000 00000000 081b3848 ffff0000 00000000 00000000
[   20.451652] d860  00000000 00000000 00000000 00000000 00000000 00000000 7a34a700 ffff8000
[   20.452138] 
[   20.452334] Call trace:
[   20.452522] Exception stack(0xffff00000aefbb00 to 0xffff00000aefbc40)
[   20.452823] bb00: 0000000000000000 ffff80000831af00 0000000000000001 0000000000000000
[   20.453076] bb20: 0000000000000000 0000000000000100 0000000000000000 ffff000008246790
[   20.453321] bb40: ffff0000091fc000 ffff00000aefba10 0000000000000001 ffff80000831b7f0
[   20.453544] bb60: 00000000a49cf051 ffffffffffffffff ffffffffffffffff ffffffffffffffff
[   20.453762] bb80: 0000000000002570 0000000000002571 0000000000000000 ffff000000c55000
[   20.453979] bba0: 0000000000000000 ffff80000831af00 ffff800079596200 0000000000000001
[   20.454199] bbc0: ffff800079596080 ffff0000081b3848 ffff80000836d800 00000000014000c0
[   20.454419] bbe0: ffff000000c520d0 ffff00000aefbc40 ffff000008083cc4 ffff00000aefbc40
[   20.454639] bc00: ffff000000c5500c 0000000040000145 0000000000000001 ffff0000081b951c
[   20.454880] bc20: 0000ffffffffffff ffff00000818d420 ffff00000aefbc40 ffff000000c5500c
[   20.455190] [] demo_init+0xc/0x1000 [demo]
[   20.455419] [] do_one_initcall+0x44/0x130
[   20.455645] [] do_init_module+0x64/0x1d4
[   20.455859] [] load_module+0x1e1c/0x24f0
[   20.456096] [] SyS_init_module+0x180/0x218
[   20.456299] Exception stack(0xffff00000aefbec0 to 0xffff00000aefc000)
[   20.456522] bec0: 0000ffffa3ec4010 000000000003b850 00000000005cad68 0000000000000000
[   20.456776] bee0: 00000000ffffffff 0000000000000000 000000001bd34680 00000000005e2bf8
[   20.457026] bf00: 0000000000000069 00000000005e4dc0 0000000000000003 0101010101010101
[   20.457288] bf20: 0000000000000038 0000000000000001 00000000005e9000 00000000005e3000
[   20.457539] bf40: 0000000000000000 0000000000000001 0000000000000000 0000000000000000
[   20.457791] bf60: 0000000000000069 0000fffff0d52878 0000000000000008 0000fffff0d52880
[   20.458052] bf80: 0000fffff0d52888 00000000005cad68 00000000005b9b24 0000000000000000
[   20.458295] bfa0: 0000000000000000 0000fffff0d52720 000000000045bc74 0000fffff0d52440
[   20.458546] bfc0: 0000000000409bf8 0000000040000000 0000ffffa3ec4010 0000000000000069
[   20.458776] bfe0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[   20.459002] [] el0_svc_naked+0x34/0x38
[   20.459211] ---[ end trace 1d225ceb44d51601 ]---

 

分析:

include/asm-generic/bug.h:

#define WARN_ON(condition) ({                                           \
        int __ret_warn_on = !!(condition);                              \
        if (unlikely(__ret_warn_on))                                    \
                __WARN();                                               \
        unlikely(__ret_warn_on);                                        \
})

 

当给WARN_ON传递非0的参数时,就会调用__WARN()打印栈:

 1 // 定义在include/asm-generic/bug.h中
 2 #define __WARN()   __WARN_TAINT(TAINT_WARN)  // TAINT_WARN是9
 3 #define __WARN_TAINT(taint)  __WARN_FLAGS(BUGFLAG_TAINT(taint))
 4 
 5 // 定义在arch/arm64/include/asm/bug.h中
 6 #define __WARN_FLAGS(flags) __BUG_FLAGS(BUGFLAG_WARNING|(flags))   // flags是0x900
 7 #define __BUG_FLAGS(flags)                \
 8     asm volatile (__stringify(ASM_BUG_FLAGS(flags)));  // flags是0x901
 9 
10 #define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line)
11 #define __BUGVERBOSE_LOCATION(file, line)            \
12         .pushsection .rodata.str,"aMS",@progbits,1;    \
13     2:    .string file;                    \
14         .popsection;                    \
15                                 \
16         .long 2b - 0b;                    \
17         .short line;
18 
19 #define __BUG_ENTRY(flags)                 \
20         .pushsection __bug_table,"aw";        \
21         .align 2;                \
22     0:    .long 1f - 0b;                \
23 _BUGVERBOSE_LOCATION(__FILE__, __LINE__)        \
24         .short flags;                 \
25         .popsection;                \
26     1:
27 
28 #define ASM_BUG_FLAGS(flags)                \
29     __BUG_ENTRY(flags)                \
30     brk    BUG_BRK_IMM

第29行,在__bug_table中增加一个段,其中存放的数据(如brk指令的地址,以及文件名和行号等)在异常处理程序中会被访问,可以参考find_bug函数

第30行的brk BUG_BRK_IMM中的宏 BUG_BRK_IMM定义如下:
/*
 * #imm16 values used for BRK instruction generation
 * Allowed values for kgdb are 0x400 - 0x7ff
 * 0x100: for triggering a fault on purpose (reserved)
 * 0x400: for dynamic BRK instruction
 * 0x401: for compile time BRK instruction
 * 0x800: kernel-mode BUG() and WARN() traps
 */
#define FAULT_BRK_IMM            0x100
#define KGDB_DYN_DBG_BRK_IMM        0x400
#define KGDB_COMPILED_DBG_BRK_IMM    0x401
#define BUG_BRK_IMM            0x800

也就是在WARN_ON的最后会执行brk 0x800,这条指令会触发一个debug同步异常:

内核中dump_stack的实现原理(1) —— 栈回溯_第5张图片
Linux内核的异常入口在arch/arm64/kernel/entry.S中,这里对应的是el1_sync:
 1 /*
 2  * EL1 mode handlers.
 3  */
 4     .align    6
 5 el1_sync:
 6     kernel_entry 1
 7     mrs    x1, esr_el1            // read the syndrome register
 8     lsr    x24, x1, #ESR_ELx_EC_SHIFT    // exception class
 9     cmp    x24, #ESR_ELx_EC_DABT_CUR    // data abort in EL1
10     b.eq    el1_da
11     cmp    x24, #ESR_ELx_EC_IABT_CUR    // instruction abort in EL1
12     b.eq    el1_ia
13     cmp    x24, #ESR_ELx_EC_SYS64        // configurable trap
14     b.eq    el1_undef
15     cmp    x24, #ESR_ELx_EC_SP_ALIGN    // stack alignment exception
16     b.eq    el1_sp_pc
17     cmp    x24, #ESR_ELx_EC_PC_ALIGN    // pc alignment exception
18     b.eq    el1_sp_pc
19     cmp    x24, #ESR_ELx_EC_UNKNOWN    // unknown exception in EL1
20     b.eq    el1_undef
21     cmp    x24, #ESR_ELx_EC_BREAKPT_CUR    // debug exception in EL1
22     b.ge    el1_dbg
23     b    el1_inv
24     
25 ... ...
26         
27 el1_dbg:
28     /*
29      * Debug exception handling
30      */
31     cmp    x24, #ESR_ELx_EC_BRK64        // if BRK64
32     cinc    x24, x24, eq            // set bit '0'
33     tbz    x24, #0, el1_inv        // EL1 only
34     mrs    x0, far_el1
35     mov    x2, sp                // struct pt_regs
36     bl    do_debug_exception
37     kernel_exit 1
 
 
第36行,调用do_debug_exception时,x0存放的是异常指令的地址,x1存放的是ESR_EL1,其中存放了异常的类型,x2存放的是struct pt_regs的首地址,在kernel_entry宏中会将通用寄存器的值存放到到其中,struct pt_regs的定义如下:
 1 /*
 2  * This struct defines the way the registers are stored on the stack during an
 3  * exception. Note that sizeof(struct pt_regs) has to be a multiple of 16 (for
 4  * stack alignment). struct user_pt_regs must form a prefix of struct pt_regs.
 5  */
 6 struct pt_regs {
 7     union {
 8         struct user_pt_regs user_regs;
 9         struct {
10             u64 regs[31]; // 存放发生异常时X0~X30的值
11             u64 sp;  // 存放struct pt_regs的首地址
12             u64 pc;  // 用于存放发生异常的指令的地址(后面在bug_handler中会加4来修正,就可以从发生异常的指令的下一条指令继续执行)
13             u64 pstate; // 用于存放异常发生时的PSTATE的状态
14         };
15     };
16     u64 orig_x0;
17 #ifdef __AARCH64EB__
18     u32 unused2;
19     s32 syscallno;
20 #else
21     s32 syscallno;
22     u32 unused2;
23 #endif
24 
25     u64 orig_addr_limit;  // 用于备份异常进程的thread_info.addr_limit
26     u64 unused;    // maintain 16 byte alignment
27     u64 stackframe[2]; // 用于存放x29和异常指令的地址
28 };

 

 
这里重点关注一下pt_regs的stackframe[2],用于是用于将 进程栈异常栈连接在一起,这样就可以从 异常栈里一直回溯到 进程栈。连接过程是在arch/arm64/kernel/entry.S中:
 mrs    x22, elr_el1
 mrs    x23, spsr_el1
 stp    lr, x21, [sp, #S_LR]

 /*
  * In order to be able to dump the contents of struct pt_regs at the
  * time the exception was taken (in case we attempt to walk the call
  * stack later), chain it together with the stack frames.
  */
 .if \el == 0
 stp    xzr, xzr, [sp, #S_STACKFRAME]
 .else
 stp    x29, x22, [sp, #S_STACKFRAME]
 .endif
 add    x29, sp, #S_STACKFRAME

 

下面分析do_debug_exception:

 1 asmlinkage int __exception do_debug_exception(unsigned long addr,
 2                           unsigned int esr,
 3                           struct pt_regs *regs)
 4 {
 5     const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
 6     struct siginfo info;
 7     int rv;
 8 
 9     /*
10      * Tell lockdep we disabled irqs in entry.S. Do nothing if they were
11      * already disabled to preserve the last enabled/disabled addresses.
12      */
13     if (interrupts_enabled(regs))
14         trace_hardirqs_off();
15 
16     if (user_mode(regs) && instruction_pointer(regs) > TASK_SIZE)
17         arm64_apply_bp_hardening();
18 
19     if (!inf->fn(addr, esr, regs)) {
20         rv = 1;
21     } else {
22         pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n",
23              inf->name, esr, addr);
24 
25         info.si_signo = inf->sig;
26         info.si_errno = 0;
27         info.si_code  = inf->code;
28         info.si_addr  = (void __user *)addr;
29         arm64_notify_die("", regs, &info, 0);
30         rv = 0;
31     }
32 
33     if (interrupts_enabled(regs))
34         trace_hardirqs_on();
35 
36     return rv;
37 }

 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
第5行,esr的值是0x3C,所以DBG_ESR_EVT(esr)的值就是(0x3C>>1)&0x7 = 6,所以就是debug_fault_info数组的第6项: { early_brk64, SIGTRAP, TRAP_BRKPT, "aarch64 BRK" }
第19行,执行 early_brk64
 
1 /*
2  * Initial handler for AArch64 BRK exceptions
3  * This handler only used until debug_traps_init().
4  */
5 int __init early_brk64(unsigned long addr, unsigned int esr,
6         struct pt_regs *regs)
7 {
8     return bug_handler(regs, esr) != DBG_HOOK_HANDLED;
9 }

 

接着又调用了bug_handler
 1 static int bug_handler(struct pt_regs *regs, unsigned int esr)
 2 {
 3     if (user_mode(regs))
 4         return DBG_HOOK_ERROR;
 5 
 6     switch (report_bug(regs->pc, regs)) {
 7     case BUG_TRAP_TYPE_BUG:
 8         die("Oops - BUG", regs, 0);
 9         break;
10 
11     case BUG_TRAP_TYPE_WARN:
12         break;
13 
14     default:
15         /* unknown/unrecognised bug trap type */
16         return DBG_HOOK_ERROR;
17     }
18 
19     /* If thread survives, skip over the BUG instruction and continue: */
20     regs->pc += AARCH64_INSN_SIZE;    /* skip BRK and resume */
21     return DBG_HOOK_HANDLED;
22 }
 
第3行,检查异常发生时是否是EL0,如果是的话,直接返回,即这个函数只能被特权模式调用
第6行,调用report_bug,将异常指令的地址和pt_regs传递给它
下面分析report_bug:
 1 enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 2 {
 3     struct bug_entry *bug;
 4     const char *file;
 5     unsigned line, warning, once, done;
 6 
 7     if (!is_valid_bugaddr(bugaddr))
 8         return BUG_TRAP_TYPE_NONE;
 9 
10     bug = find_bug(bugaddr);
11     if (!bug)
12         return BUG_TRAP_TYPE_NONE;
13 
14     file = NULL;
15     line = 0;
16     warning = 0;
17 
18     if (bug) {
19 #ifdef CONFIG_DEBUG_BUGVERBOSE
20 #ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
21         file = bug->file;
22 #else
23         file = (const char *)bug + bug->file_disp;
24 #endif
25         line = bug->line;
26 #endif
27         warning = (bug->flags & BUGFLAG_WARNING) != 0;
28         once = (bug->flags & BUGFLAG_ONCE) != 0;
29         done = (bug->flags & BUGFLAG_DONE) != 0;
30 
31         if (warning && once) {
32             if (done)
33                 return BUG_TRAP_TYPE_WARN;
34 
35             /*
36              * Since this is the only store, concurrency is not an issue.
37              */
38             bug->flags |= BUGFLAG_DONE;
39         }
40     }
41 
42     if (warning) {
43         /* this is a WARN_ON rather than BUG/BUG_ON */
44         __warn(file, line, (void *)bugaddr, BUG_GET_TAINT(bug), regs,
45                NULL);
46         return BUG_TRAP_TYPE_WARN;
47     }
48 
49     printk(KERN_DEFAULT "------------[ cut here ]------------\n");
50 
51     if (file)
52         pr_crit("kernel BUG at %s:%u!\n", file, line);
53     else
54         pr_crit("Kernel BUG at %p [verbose debug info unavailable]\n",
55             (void *)bugaddr);
56 
57     return BUG_TRAP_TYPE_BUG;
58 }

第10行,在__bug_table中找到bugaddr对应的那一项bug_entry,每个WARN_ON都会增加一项,这个函数首先在kernel的__bug_table段进行搜索,如果没有找到的话,就会在module的bug_table中进行搜索,这个很好理解,如果是静态编译到内核里的,那么就会在kernel的__bug_table里找到,如果编译到了内核模块中,那么就会在module的bug_table中找到。可以参考前面对__BUG_ENTRY的分析

第27行,bug->flags的值是0x901,所以warning是1,once和done都是0
第44行,调用__warn,file表示文件名,line表示行号,bugaddr表示brk指令的地址,regs为pt_regs
 
下面分析__warn:
 1 void __warn(const char *file, int line, void *caller, unsigned taint,
 2         struct pt_regs *regs, struct warn_args *args)
 3 {
 4     disable_trace_on_warning();
 5 
 6     pr_warn("------------[ cut here ]------------\n");
 7 
 8     if (file)
 9         pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n",
10             raw_smp_processor_id(), current->pid, file, line,
11             caller);
12     else
13         pr_warn("WARNING: CPU: %d PID: %d at %pS\n",
14             raw_smp_processor_id(), current->pid, caller);
15 
16     if (args)
17         vprintk(args->fmt, args->args);
18 
19     if (panic_on_warn) {
20         /*
21          * This thread may hit another WARN() in the panic path.
22          * Resetting this prevents additional WARN() from panicking the
23          * system on this thread.  Other threads are blocked by the
24          * panic_mutex in panic().
25          */
26         panic_on_warn = 0;
27         panic("panic_on_warn set ...\n");
28     }
29 
30     print_modules();
31 
32     if (regs)
33         show_regs(regs);
34     else
35         dump_stack();
36 
37     print_oops_end_marker();
38 
39     /* Just a warning, don't kill lockdep. */
40     add_taint(taint, LOCKDEP_STILL_OK);
41 }
 
 
第8~14,对应上面内核log的第2行,输出当前的cpu编号,当前的进程号,WARN_ON所在的文件名和行号,以及caller,即WARN_ON在被调用函数中的位置,就是demo_init+0xc/0x1000 [demo]
第16~17,输出args中的内容,这里是NULL,如果没有定义__WARN_TAINT,并且使用的是__WARN_printf,那么这里的args就不是NULL了。参考include/asm-generic/bug.h
第19~28,panic_on_warn表示发生调用__warn时是否触发panic。可以通过修改/proc/sys/kernel/panic_on_warn来改变 panic_on_warn的值
第30行,输出module信息,对应内核log的第3行:demo(O+)。 其中demo表示module的名字,'O'表示TAINT_OOT_MODULE,'+'表示模块正在被加载,'-'表示模块正在被卸载
第33行,调用show_regs输出寄存器和栈信息,定义在arch/arm64/kernel/process.c中:
1 void show_regs(struct pt_regs * regs)
2 {
3     __show_regs(regs);
4     dump_backtrace(regs, NULL);
5 }
 
下面分析一下__show_regs和dump_backtrace。
__show_regs:
 1 void __show_regs(struct pt_regs *regs)
 2 {
 3     int i, top_reg;
 4     u64 lr, sp;
 5 
 6     if (compat_user_mode(regs)) {
 7         lr = regs->compat_lr;
 8         sp = regs->compat_sp;
 9         top_reg = 12;
10     } else {
11         lr = regs->regs[30];
12         sp = regs->sp;
13         top_reg = 29;
14     }
15 
16     show_regs_print_info(KERN_DEFAULT);
17     print_symbol("PC is at %s\n", instruction_pointer(regs));
18     print_symbol("LR is at %s\n", lr);
19     printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n",
20            regs->pc, lr, regs->pstate);
21     printk("sp : %016llx\n", sp);
22 
23     i = top_reg;
24 
25     while (i >= 0) {
26         printk("x%-2d: %016llx ", i, regs->regs[i]);
27         i--;
28 
29         if (i % 2 == 0) {
30             pr_cont("x%-2d: %016llx ", i, regs->regs[i]);
31             i--;
32         }
33 
34         pr_cont("\n");
35     }
36     if (!user_mode(regs))
37         show_extra_register_data(regs, 128);
38     printk("\n");
39 }

 

 
 
第6~14,判断是不是compat_user_mode,即发生异常是从Aarch32到Aarch64,即从ARMv7切到ARMv8,这俩在寄存器上需要做一下映射。参考arch/arm64/include/asm/ptrace.h:
1 /* Architecturally defined mapping between AArch32 and AArch64 registers */
2 #define compat_fp    regs[11]
3 #define compat_sp    regs[13]
4 #define compat_lr    regs[14]
 
 
第17行,对应的就是log里的第7行:PC is at demo_init+0xc/0x1000 [demo],print_symbol函数将地址转换为内核的符号,这个后面分析。
第25~35,输出通用寄存器信息, 对于Aarch32切到Aarch64的情况,只输出x0~x12寄存器的值。这里可以学习一下printk和pr_cont的用法。
第36~37,如果发生异常时处于特权模式的话,然后会判断含有有效地址的寄存器,并输出这些地址周围的空间的内容
 
dump_backtrace:
 1 void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 2 {
 3     struct stackframe frame;
 4     int skip;
 5 
 6     pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
 7 
 8     if (!tsk)
 9         tsk = current;
10 
11     if (!try_get_task_stack(tsk))
12         return;
13 
14     if (tsk == current) {
15         frame.fp = (unsigned long)__builtin_frame_address(0);
16         frame.pc = (unsigned long)dump_backtrace;
17     } else {
18         /*
19          * task blocked in __switch_to
20          */
21         frame.fp = thread_saved_fp(tsk);
22         frame.pc = thread_saved_pc(tsk);
23     }
24 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
25     frame.graph = tsk->curr_ret_stack;
26 #endif
27 
28     skip = !!regs;
29     printk("Call trace:\n");
30     while (1) {
31         unsigned long stack;
32         int ret;
33 
34         /* skip until specified stack frame */
35         if (!skip) {
36             dump_backtrace_entry(frame.pc);
37         } else if (frame.fp == regs->regs[29]) {
38             skip = 0;
39             /*
40              * Mostly, this is the case where this function is
41              * called in panic/abort. As exception handler's
42              * stack frame does not contain the corresponding pc
43              * at which an exception has taken place, use regs->pc
44              * instead.
45              */
46             dump_backtrace_entry(regs->pc);
47         }
48         ret = unwind_frame(tsk, &frame);
49         if (ret < 0)
50             break;
51         if (in_entry_text(frame.pc)) {
52             stack = frame.fp - offsetof(struct pt_regs, stackframe);
53 
54             if (on_accessible_stack(tsk, stack))
55                 dump_mem("", "Exception stack", stack,
56                      stack + sizeof(struct pt_regs));
57         }
58     }
59 
60     put_task_stack(tsk);
61 }
 
第15行,获取当前帧寄存器x29的值
第28行,skip为1
第30~58,栈回溯。第37行控制输出栈回溯信息的起始点,如果从regs->pc开始全部输出栈回溯信息的话,会得到如下的栈信息:
[   16.097764] [] demo_init+0xc/0x1000 [demo]
[   16.098018] [] show_regs+0x2c/0x38
[   16.098212] [] __warn+0xb4/0x118
[   16.098410] [] report_bug+0xbc/0x140
[   16.098681] [] bug_handler.part.2+0x24/0x78
[   16.098893] [] bug_handler+0x3c/0x48
[   16.099087] [] brk_handler+0xe0/0x1a0
[   16.099289] [] do_debug_exception+0xa4/0x15c
[   16.102487] [] el1_dbg+0x18/0x74
[   16.102699] [] demo_init+0xc/0x1000 [demo]
[   16.102915] [] do_one_initcall+0x44/0x130
[   16.103125] [] do_init_module+0x64/0x1d4
[   16.103322] [] load_module+0x1e1c/0x24f0
[   16.103535] [] SyS_init_module+0x180/0x218

上面的栈回溯信息中,我们只关心从demo_init开始的:

[   16.102699] [] demo_init+0xc/0x1000 [demo]
[   16.102915] [] do_one_initcall+0x44/0x130
[   16.103125] [] do_init_module+0x64/0x1d4
[   16.103322] [] load_module+0x1e1c/0x24f0
[   16.103535] [] SyS_init_module+0x180/0x218
 
 
这就是第37行的作用。这里还应该注意之前说的,将异常栈和进程栈连接起来,实现从异常栈一直回溯到进程栈。
 
dump_backtrace_entry函数定义如下:
 1 static void dump_backtrace_entry(unsigned long where)
 2 {
 3     /*
 4      * Note that 'where' can have a physical address, but it's not handled.
 5      */
 6     print_ip_sym(where);
 7 }
 8 
 9 static inline void print_ip_sym(unsigned long ip)
10 {
11     printk("[<%p>] %pS\n", (void *) ip, (void *) ip);
12 }

上面%pS的作用就是将地址转换成内核符号。

第51~57,输出异常栈信息,主要就是struct pt_regs的内容:
内核中dump_stack的实现原理(1) —— 栈回溯_第6张图片
 
完。
 
 
 
 

转载于:https://www.cnblogs.com/pengdonglin137/p/11109427.html

你可能感兴趣的:(内核中dump_stack的实现原理(1) —— 栈回溯)