本文汇编代码的平台及编译器:arm/gcc。分析函数调用栈的规则对于理解程序运行基本原理很有帮助,汇编代码分析crash问题也大有裨益。本文示例代码通过C语言函数调用一个汇编函数,再从汇编函数跳转回C函数,分析该示例的汇编代码就可以stack frame的创建和arm函数调用的传参规则。
问题:
2.1 传参规则
2.2 stack frame基本机构
示例代码包括两个源文件:transferParam.c和transferParam.S
transferParam.c:
#include
#include
#include
extern void bionic_clone(int flags, int* child_stack, int* parent_tid, int* tls, int* child_tid, int (*fn)(void*), int* arg);
int child(void* v) {
return 0;
}
void my_fork(int flags, int *child_stack, int *ptid, int *tls, int *child_tid, int *child, int *args) {
printf("flags:%d sp:%p ptid:%p tls:%p child_tid:%p child:%p args:%d\n",
flags, child_stack, ptid, tls, child_tid, child, *args);
}
int main(){
int flags = 0;
int *childStack = (int*)0x01;
int *parent_tid = (int*)0x02;
int *tls = (int*)0x03;
int *child_tid = (int*)0x04;
int arg = 5;
printf("%s\n", "before bionic_clone");
bionic_clone(flags, childStack, parent_tid, tls, child_tid, child, &arg);
printf("%s\n", "after bionic_clone");
return 0;
}
c代码中调用了汇编函数bionic_clone,且参数超过4个,需要使用栈传递参数。
transferParam.S:
.globl bionic_clone
bionic_clone:
push {fp, lr} @fp, lr入栈
add fp, sp, #4 @fp = sp - 4
@stmfd sp!, {r4, r5, r6}
ldr r4, [fp, #4] @读取第五个参数到r4寄存器
ldr r5, [fp, #8] @读取第六个参数到r5寄存器
ldr r6, [fp, #12] @读取第7个参数到r6寄存器
stmfd sp!, {r4, r5, r6} @r4, r5, r6入栈,以此给my_fork函数传参
bl my_fork
sub sp, fp, #4 @sp = fp - 4
pop {fp, pc} @恢复fp, lr到fp和pc寄存器,实现函数返回
.type bionic_clone,%function
代码执行到ldr r6, [fp, #12] stack frame图示:
正如main函数通过栈给bionic_clone函数传递第5,6,7三个参数,bionic_clone也将r4 r5 r6入栈给my_fork传递参数
void bar(int a , int b ) {
printf("bar\n");
a = a + b;
printf("%d\n",a);
}
void foo() {
int a = 0;
int b = 1;
bar(a, b);
}
int main(int argc, char *argv[]) {
foo();
}
~
反汇编代码:
000000000040072c :
40072c: a9be7bfd stp x29, x30, [sp,#-32]!
400730: 910003fd mov x29, sp
400734: b9001fa0 str w0, [x29,#28]
400738: b9001ba1 str w1, [x29,#24]
40073c: 90000000 adrp x0, 400000 <_init-0x598>
400740: 91216000 add x0, x0, #0x858
400744: 97ffffaf bl 400600
400748: b9401fa1 ldr w1, [x29,#28]
40074c: b9401ba0 ldr w0, [x29,#24]
400750: 0b000020 add w0, w1, w0
400754: b9001fa0 str w0, [x29,#28]
400758: 90000000 adrp x0, 400000 <_init-0x598>
40075c: 91218000 add x0, x0, #0x860
400760: b9401fa1 ldr w1, [x29,#28]
400764: 97ffffab bl 400610
400768: d503201f nop
40076c: a8c27bfd ldp x29, x30, [sp],#32
400770: d65f03c0 ret
0000000000400774 :
400774: a9be7bfd stp x29, x30, [sp,#-32]!
400778: 910003fd mov x29, sp
40077c: b9001fbf str wzr, [x29,#28]
400780: 52800020 mov w0, #0x1 // #1
400784: b9001ba0 str w0, [x29,#24]
400788: b9401ba1 ldr w1, [x29,#24]
40078c: b9401fa0 ldr w0, [x29,#28]
400790: 97ffffe7 bl 40072c
400794: d503201f nop
400798: a8c27bfd ldp x29, x30, [sp],#32
40079c: d65f03c0 ret
00000000004007a0 :
4007a0: a9be7bfd stp x29, x30, [sp,#-32]!
4007a4: 910003fd mov x29, sp
4007a8: b9001fa0 str w0, [x29,#28]
4007ac: f9000ba1 str x1, [x29,#16]
4007b0: 97fffff1 bl 400774
4007b4: 52800000 mov w0, #0x0 // #0
4007b8: a8c27bfd ldp x29, x30, [sp],#32
4007bc: d65f03c0 ret
arm64栈帧结构:
5. 实战,内核如何dump bactrace
为了加深stack frame的理解,可以分析arm64如何dump bactrace。内核配置CONFIG_FRAME_POINTER可以基于fp栈回溯。基本原理可以看栈帧结构中,比如arm64小节示例代码中,main调用foo,foo调用bar,我们从bar开始回溯栈帧,如果我们先得到bar的x29值,那么从x29 + 8处保存了x30,即为caller调用者的地址,bar x29又可以回溯到foo函数的栈帧结构,依次类推就可以回溯整个函数调用。
kernel-4.14/arch/arm64/kernel/traps.c:
void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
struct stackframe frame;
int skip;
pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
if (!tsk)
tsk = current;
if (!try_get_task_stack(tsk))
return;
//假设是dump当前task的backtrace
if (tsk == current) {
//__builtin_frame_address是编译内置函数,返回当前栈栈帧地址即x29.
frame.fp = (unsigned long)__builtin_frame_address(0);
frame.pc = (unsigned long)dump_backtrace;
} else {
/*
* task blocked in __switch_to
*/
frame.fp = thread_saved_fp(tsk);
frame.pc = thread_saved_pc(tsk);
}
skip = !!regs;
printk("Call trace:\n");
while (1) {
unsigned long stack;
int ret;
//dump_backtrace_entry打印frame.pc的值
/* skip until specified stack frame */
if (!skip) {
dump_backtrace_entry(frame.pc);
} else if (frame.fp == regs->regs[29]) {
skip = 0;
/*
* Mostly, this is the case where this function is
* called in panic/abort. As exception handler's
* stack frame does not contain the corresponding pc
* at which an exception has taken place, use regs->pc
* instead.
*/
dump_backtrace_entry(regs->pc);
}
ret = unwind_frame(tsk, &frame);
if (ret < 0)
break;
if (in_entry_text(frame.pc)) {
stack = frame.fp - offsetof(struct pt_regs, stackframe);
if (on_accessible_stack(tsk, stack))
dump_mem("", "Exception stack", stack,
stack + sizeof(struct pt_regs));
}
}
put_task_stack(tsk);
}
/*
* AArch64 PCS assigns the frame pointer to x29.
*
* A simple function prologue looks like this:
* sub sp, sp, #0x10
* stp x29, x30, [sp]
* mov x29, sp
*
* A simple function epilogue looks like this:
* mov sp, x29
* ldp x29, x30, [sp]
* add sp, sp, #0x10
*/
int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
{
unsigned long fp = frame->fp;
if (fp & 0xf)
return -EINVAL;
if (!tsk)
tsk = current;
if (!on_accessible_stack(tsk, fp))
return -EINVAL;
//获取上一级(caller)的fp值,具体可以看arm64栈帧结构
frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
//fp+8存储的是caller调用之的地址(即返回地址),具体可以对着arm64栈帧结构看
frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
/*
* Frames created upon entry from EL0 have NULL FP and PC values, so
* don't bother reporting these. Frames created by __noreturn functions
* might have a valid FP even if PC is bogus, so only terminate where
* both are NULL.
*/
if (!frame->fp && !frame->pc)
return -EINVAL;
return 0;
}