递归和尾递归汇编层面的差别

erlang的标准用法是尽可能的把函数调用写出尾递归的方式,实际的结果靠参数传递。尾递归的方式对进程的堆栈使用很小, 只要一个WORD, 但是非尾递归就要看递归的层数,如果数量很大,会把堆栈撑的很大。我们在汇编一级看下如何实现的:

root@nd-desktop:~# cat tailcall.erl 
-module(tailcall).
-export([start/1]).
-compile(export_all).

start(N)->
    X = loop(N),
    Y = tail_loop(N),
    X = Y,
    done.

loop(0)->
    1;
loop(N) when N >0 ->
   N * loop(N-1).


tail_loop(N)->
    tail_loop2(N, 1).
tail_loop2(0, R)->
    R;
tail_loop2(N, R) ->
    tail_loop2(N-1, N *R).


root@nd-desktop:~# erlc +"'S'" tailcall.erl
root@nd-desktop:~# cat tailcall.S         
{module, tailcall}.  %% version = 0

{exports, [{loop,1},
           {module_info,0},
           {module_info,1},
           {start,1},
           {tail_loop,1},
           {tail_loop2,2}]}.

{attributes, []}.

{labels, 16}.

{function, start, 1, 2}.
  {label,1}.
    {func_info,{atom,tailcall},{atom,start},1}.
  {label,2}.
    {allocate,1,1}.
    {move,{x,0},{y,0}}.
    {call,1,{f,5}}.
    {move,{x,0},{x,1}}.
    {move,{y,0},{x,0}}.
    {move,{x,1},{y,0}}.
    {call,1,{f,8}}.
    {test,is_eq_exact,{f,3},[{x,0},{y,0}]}.
    {move,{atom,done},{x,0}}.
    {deallocate,1}.
    return.
  {label,3}.
    {badmatch,{x,0}}.


{function, loop, 1, 5}.
  {label,4}.
    {func_info,{atom,tailcall},{atom,loop},1}.
  {label,5}.
    {test,is_eq_exact,{f,6},[{x,0},{integer,0}]}.
    {move,{integer,1},{x,0}}.
    return.
  {label,6}.
    {test,is_lt,{f,4},[{integer,0},{x,0}]}.
    {allocate_zero,1,1}.
%% 主要是这条 allocate_zero 这个指令 把当前的调用栈保存 同时分配个参数空间
%% 对应 AllocateZero 这个操作

    {gc_bif,'-',{f,0},1,[{x,0},{integer,1}],{x,1}}.
    {move,{x,0},{y,0}}.
    {move,{x,1},{x,0}}.
    {call,1,{f,5}}.
%% call opcode, 再次调用 堆栈层数+1
    {gc_bif,'*',{f,0},1,[{y,0},{x,0}],{x,0}}.
    {deallocate,1}.
%% 恢复调用栈
%% 对应 deallocate_I操作

    return.


{function, tail_loop, 1, 8}.
  {label,7}.
    {func_info,{atom,tailcall},{atom,tail_loop},1}.
  {label,8}.
    {move,{integer,1},{x,1}}.
    {call_only,2,{f,10}}.


{function, tail_loop2, 2, 10}.
  {label,9}.
    {func_info,{atom,tailcall},{atom,tail_loop2},2}.
  {label,10}.
    {test,is_eq_exact,{f,11},[{x,0},{integer,0}]}.
    {move,{x,1},{x,0}}.
    return.
  {label,11}.
    {gc_bif,'-',{f,0},2,[{x,0},{integer,1}],{x,2}}.
    {gc_bif,'*',{f,0},3,[{x,0},{x,1}],{x,1}}.
    {move,{x,2},{x,0}}.
    {call_only,2,{f,10}}.
%% 调用 call_only opcode 没有建立堆栈的过程


{function, module_info, 0, 13}.
  {label,12}.
    {func_info,{atom,tailcall},{atom,module_info},0}.
  {label,13}.
    {move,{atom,tailcall},{x,0}}.
    {call_ext_only,1,{extfunc,erlang,get_module_info,1}}.


{function, module_info, 1, 15}.
  {label,14}.
    {func_info,{atom,tailcall},{atom,module_info},1}.
  {label,15}.
    {move,{x,0},{x,1}}.
    {move,{atom,tailcall},{x,0}}.
    {call_ext_only,2,{extfunc,erlang,get_module_info,2}}.

我们在beam_emu.c中可以看到:

#define AllocateZero(Ns, Live)             \                            
do { Eterm* ptr;                          \                            
      int i = (Ns);                        \
      AH(i, 0, Live);                      \                            
      for (ptr = E + i; ptr > E; ptr--) {  \
         make_blank(*ptr);                 \
     }                                     \
  } while (0)

#define AH(StackNeed, HeapNeed, M) \
  do { \
     int needed; \
     needed = (StackNeed) + 1; \
     if (E - HTOP < (needed + (HeapNeed))) { \
           SWAPOUT; \
           reg[0] = r(0); \
           PROCESS_MAIN_CHK_LOCKS(c_p); \
           FCALLS -= erts_garbage_collect(c_p, needed + (HeapNeed), reg, (M)); \
           PROCESS_MAIN_CHK_LOCKS(c_p); \
           r(0) = reg[0]; \
           SWAPIN; \
     } \
     E -= needed; \
     SAVE_CP(E); \
  } while (0)

#define SAVE_CP(X)                              \
   do {                                         \
      *(X) = make_cp(c_p->cp);                  \
      c_p->cp = 0;                              \
   } while(0)

#define RESTORE_CP(X)           SET_CP(c_p, cp_val(*(X)))

#define D(N)             \
     RESTORE_CP(E);      \
     E += (N) + 1;

OpCase(deallocate_I): {
     Eterm* next;

     PreFetch(1, next);
     D(Arg(0));
     NextPF(1, next);
}

结论是:
尾递归的效率要比递归的好很多, 包括堆栈的使用和参数的分配,就是代码写起来不那么直观。

你可能感兴趣的:(C++,c,erlang,F#,ext)