arm/arm64函数栈帧(stackframe)结构和传参规则

1. 概述和问题

本文汇编代码的平台及编译器:arm/gcc。分析函数调用栈的规则对于理解程序运行基本原理很有帮助,汇编代码分析crash问题也大有裨益。本文示例代码通过C语言函数调用一个汇编函数,再从汇编函数跳转回C函数,分析该示例的汇编代码就可以stack frame的创建和arm函数调用的传参规则。

问题:

  • arm32使用哪些寄存器传参,如果参数超过4个怎么传参?
  • arm32/gcc中函数调用stack frame的创建,以及函数返回stack frame的销毁过程是怎样的?

2.arm32函数传参规则和stack frame基本结构

2.1 传参规则

  • r0-r3传递第1-第4个参数;如果超过4个参数使用栈传递参数,且当前函数栈顶(sp指向的地址)放置第5个参数,sp+4处放置第6个参数。
  • r0存放返回值

2.2 stack frame基本机构

arm/arm64函数栈帧(stackframe)结构和传参规则_第1张图片

3.示例代码

示例代码包括两个源文件:transferParam.c和transferParam.S

transferParam.c:

#include 
#include 
#include 
                                                                                                                                                           
extern void bionic_clone(int flags, int* child_stack, int* parent_tid, int* tls, int* child_tid, int (*fn)(void*), int* arg);

int child(void* v) {
    return 0;
}

void my_fork(int flags, int *child_stack, int *ptid, int *tls, int *child_tid, int *child, int *args) {
    printf("flags:%d sp:%p ptid:%p tls:%p child_tid:%p child:%p args:%d\n",
        flags, child_stack, ptid, tls, child_tid, child, *args);
}

int main(){
    int flags = 0;
    int *childStack = (int*)0x01;
    int *parent_tid = (int*)0x02;
    int *tls = (int*)0x03;
    int *child_tid = (int*)0x04;
    int arg = 5;

    printf("%s\n", "before bionic_clone");
    bionic_clone(flags, childStack, parent_tid, tls, child_tid, child, &arg);
    printf("%s\n", "after bionic_clone");
    return 0;
}

c代码中调用了汇编函数bionic_clone,且参数超过4个,需要使用栈传递参数。

transferParam.S:

.globl bionic_clone
bionic_clone:
    push {fp, lr}                @fp, lr入栈
    add fp, sp, #4               @fp = sp - 4
    @stmfd   sp!, {r4, r5, r6}
    ldr r4, [fp, #4]             @读取第五个参数到r4寄存器
    ldr r5, [fp, #8]             @读取第六个参数到r5寄存器
    ldr r6, [fp, #12]            @读取第7个参数到r6寄存器
    stmfd sp!, {r4, r5, r6}      @r4, r5, r6入栈,以此给my_fork函数传参
    bl      my_fork
    sub sp, fp, #4               @sp = fp - 4
    pop {fp, pc}                 @恢复fp, lr到fp和pc寄存器,实现函数返回

.type bionic_clone,%function

代码执行到ldr r6, [fp, #12] stack frame图示:

arm/arm64函数栈帧(stackframe)结构和传参规则_第2张图片

 正如main函数通过栈给bionic_clone函数传递第5,6,7三个参数,bionic_clone也将r4 r5 r6入栈给my_fork传递参数

4. arm64栈帧结构

void bar(int a , int b ) {  
    printf("bar\n");    
    a = a + b;
    printf("%d\n",a);   
}

void foo() {
   int a = 0;
   int b = 1;
   bar(a, b);
} 
  
int main(int argc, char *argv[]) {
   foo();   
} 
~ 

反汇编代码:


000000000040072c :
  40072c:   a9be7bfd    stp x29, x30, [sp,#-32]!
  400730:   910003fd    mov x29, sp
  400734:   b9001fa0    str w0, [x29,#28]
  400738:   b9001ba1    str w1, [x29,#24]
  40073c:   90000000    adrp    x0, 400000 <_init-0x598>
  400740:   91216000    add x0, x0, #0x858
  400744:   97ffffaf    bl  400600 
  400748:   b9401fa1    ldr w1, [x29,#28]
  40074c:   b9401ba0    ldr w0, [x29,#24]
  400750:   0b000020    add w0, w1, w0
  400754:   b9001fa0    str w0, [x29,#28]
  400758:   90000000    adrp    x0, 400000 <_init-0x598>
  40075c:   91218000    add x0, x0, #0x860
  400760:   b9401fa1    ldr w1, [x29,#28]
  400764:   97ffffab    bl  400610 
  400768:   d503201f    nop
  40076c:   a8c27bfd    ldp x29, x30, [sp],#32
  400770:   d65f03c0    ret

0000000000400774 :
  400774:   a9be7bfd    stp x29, x30, [sp,#-32]!
  400778:   910003fd    mov x29, sp
  40077c:   b9001fbf    str wzr, [x29,#28]
  400780:   52800020    mov w0, #0x1                    // #1
  400784:   b9001ba0    str w0, [x29,#24]
  400788:   b9401ba1    ldr w1, [x29,#24]
  40078c:   b9401fa0    ldr w0, [x29,#28]
  400790:   97ffffe7    bl  40072c 
  400794:   d503201f    nop
  400798:   a8c27bfd    ldp x29, x30, [sp],#32
  40079c:   d65f03c0    ret

00000000004007a0 
: 4007a0: a9be7bfd stp x29, x30, [sp,#-32]! 4007a4: 910003fd mov x29, sp 4007a8: b9001fa0 str w0, [x29,#28] 4007ac: f9000ba1 str x1, [x29,#16] 4007b0: 97fffff1 bl 400774 4007b4: 52800000 mov w0, #0x0 // #0 4007b8: a8c27bfd ldp x29, x30, [sp],#32 4007bc: d65f03c0 ret

arm64栈帧结构:

arm/arm64函数栈帧(stackframe)结构和传参规则_第3张图片

 

 5. 实战,内核如何dump bactrace

为了加深stack frame的理解,可以分析arm64如何dump bactrace。内核配置CONFIG_FRAME_POINTER可以基于fp栈回溯。基本原理可以看栈帧结构中,比如arm64小节示例代码中,main调用foo,foo调用bar,我们从bar开始回溯栈帧,如果我们先得到bar的x29值,那么从x29 + 8处保存了x30,即为caller调用者的地址,bar x29又可以回溯到foo函数的栈帧结构,依次类推就可以回溯整个函数调用。

kernel-4.14/arch/arm64/kernel/traps.c:

void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
    struct stackframe frame;
    int skip;

    pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);

    if (!tsk)
        tsk = current;

    if (!try_get_task_stack(tsk))
        return;

    //假设是dump当前task的backtrace
    if (tsk == current) {
        //__builtin_frame_address是编译内置函数,返回当前栈栈帧地址即x29.
        frame.fp = (unsigned long)__builtin_frame_address(0);
        frame.pc = (unsigned long)dump_backtrace;
    } else {
        /*
         * task blocked in __switch_to
         */
        frame.fp = thread_saved_fp(tsk);
        frame.pc = thread_saved_pc(tsk);
    }

    skip = !!regs;
    printk("Call trace:\n");
    while (1) {
        unsigned long stack;
        int ret;

        //dump_backtrace_entry打印frame.pc的值
        /* skip until specified stack frame */
        if (!skip) {
            dump_backtrace_entry(frame.pc);
        } else if (frame.fp == regs->regs[29]) {
            skip = 0;
            /*
             * Mostly, this is the case where this function is
             * called in panic/abort. As exception handler's
             * stack frame does not contain the corresponding pc
             * at which an exception has taken place, use regs->pc
             * instead.
             */
            dump_backtrace_entry(regs->pc);
        }
        ret = unwind_frame(tsk, &frame);
        if (ret < 0)
            break;
        if (in_entry_text(frame.pc)) {
            stack = frame.fp - offsetof(struct pt_regs, stackframe);

            if (on_accessible_stack(tsk, stack))
                dump_mem("", "Exception stack", stack,
                     stack + sizeof(struct pt_regs));
        }
    }

    put_task_stack(tsk);
}



/*
 * AArch64 PCS assigns the frame pointer to x29.
 *
 * A simple function prologue looks like this:
 *  sub sp, sp, #0x10
 *      stp x29, x30, [sp]
 *  mov x29, sp
 *
 * A simple function epilogue looks like this:
 *  mov sp, x29
 *  ldp x29, x30, [sp]
 *  add sp, sp, #0x10
 */
int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
{
    unsigned long fp = frame->fp;

    if (fp & 0xf)
        return -EINVAL;

    if (!tsk)
        tsk = current;

    if (!on_accessible_stack(tsk, fp))
        return -EINVAL;

    //获取上一级(caller)的fp值,具体可以看arm64栈帧结构
    frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));

    //fp+8存储的是caller调用之的地址(即返回地址),具体可以对着arm64栈帧结构看
    frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));

    /*
     * Frames created upon entry from EL0 have NULL FP and PC values, so
     * don't bother reporting these. Frames created by __noreturn functions
     * might have a valid FP even if PC is bogus, so only terminate where
     * both are NULL.
     */
    if (!frame->fp && !frame->pc)
        return -EINVAL;

    return 0;
}

你可能感兴趣的:(程序员基本素养,汇编语言,arm,嵌入式硬件)