erlang的标准用法是尽可能的把函数调用写出尾递归的方式,实际的结果靠参数传递。尾递归的方式对进程的堆栈使用很小, 只要一个WORD, 但是非尾递归就要看递归的层数,如果数量很大,会把堆栈撑的很大。我们在汇编一级看下如何实现的:
root@nd-desktop:~# cat tailcall.erl
-module(tailcall).
-export([start/1]).
-compile(export_all).
start(N)->
X = loop(N),
Y = tail_loop(N),
X = Y,
done.
loop(0)->
1;
loop(N) when N >0 ->
N * loop(N-1).
tail_loop(N)->
tail_loop2(N, 1).
tail_loop2(0, R)->
R;
tail_loop2(N, R) ->
tail_loop2(N-1, N *R).
root@nd-desktop:~# erlc +"'S'" tailcall.erl
root@nd-desktop:~# cat tailcall.S
{module, tailcall}. %% version = 0
{exports, [{loop,1},
{module_info,0},
{module_info,1},
{start,1},
{tail_loop,1},
{tail_loop2,2}]}.
{attributes, []}.
{labels, 16}.
{function, start, 1, 2}.
{label,1}.
{func_info,{atom,tailcall},{atom,start},1}.
{label,2}.
{allocate,1,1}.
{move,{x,0},{y,0}}.
{call,1,{f,5}}.
{move,{x,0},{x,1}}.
{move,{y,0},{x,0}}.
{move,{x,1},{y,0}}.
{call,1,{f,8}}.
{test,is_eq_exact,{f,3},[{x,0},{y,0}]}.
{move,{atom,done},{x,0}}.
{deallocate,1}.
return.
{label,3}.
{badmatch,{x,0}}.
{function, loop, 1, 5}.
{label,4}.
{func_info,{atom,tailcall},{atom,loop},1}.
{label,5}.
{test,is_eq_exact,{f,6},[{x,0},{integer,0}]}.
{move,{integer,1},{x,0}}.
return.
{label,6}.
{test,is_lt,{f,4},[{integer,0},{x,0}]}.
{allocate_zero,1,1}.
%% 主要是这条 allocate_zero 这个指令 把当前的调用栈保存 同时分配个参数空间
%% 对应 AllocateZero 这个操作
{gc_bif,'-',{f,0},1,[{x,0},{integer,1}],{x,1}}.
{move,{x,0},{y,0}}.
{move,{x,1},{x,0}}.
{call,1,{f,5}}.
%% call opcode, 再次调用 堆栈层数+1
{gc_bif,'*',{f,0},1,[{y,0},{x,0}],{x,0}}.
{deallocate,1}.
%% 恢复调用栈
%% 对应 deallocate_I操作
return.
{function, tail_loop, 1, 8}.
{label,7}.
{func_info,{atom,tailcall},{atom,tail_loop},1}.
{label,8}.
{move,{integer,1},{x,1}}.
{call_only,2,{f,10}}.
{function, tail_loop2, 2, 10}.
{label,9}.
{func_info,{atom,tailcall},{atom,tail_loop2},2}.
{label,10}.
{test,is_eq_exact,{f,11},[{x,0},{integer,0}]}.
{move,{x,1},{x,0}}.
return.
{label,11}.
{gc_bif,'-',{f,0},2,[{x,0},{integer,1}],{x,2}}.
{gc_bif,'*',{f,0},3,[{x,0},{x,1}],{x,1}}.
{move,{x,2},{x,0}}.
{call_only,2,{f,10}}.
%% 调用 call_only opcode 没有建立堆栈的过程
{function, module_info, 0, 13}.
{label,12}.
{func_info,{atom,tailcall},{atom,module_info},0}.
{label,13}.
{move,{atom,tailcall},{x,0}}.
{call_ext_only,1,{extfunc,erlang,get_module_info,1}}.
{function, module_info, 1, 15}.
{label,14}.
{func_info,{atom,tailcall},{atom,module_info},1}.
{label,15}.
{move,{x,0},{x,1}}.
{move,{atom,tailcall},{x,0}}.
{call_ext_only,2,{extfunc,erlang,get_module_info,2}}.
我们在beam_emu.c中可以看到:
#define AllocateZero(Ns, Live) \
do { Eterm* ptr; \
int i = (Ns); \
AH(i, 0, Live); \
for (ptr = E + i; ptr > E; ptr--) { \
make_blank(*ptr); \
} \
} while (0)
#define AH(StackNeed, HeapNeed, M) \
do { \
int needed; \
needed = (StackNeed) + 1; \
if (E - HTOP < (needed + (HeapNeed))) { \
SWAPOUT; \
reg[0] = r(0); \
PROCESS_MAIN_CHK_LOCKS(c_p); \
FCALLS -= erts_garbage_collect(c_p, needed + (HeapNeed), reg, (M)); \
PROCESS_MAIN_CHK_LOCKS(c_p); \
r(0) = reg[0]; \
SWAPIN; \
} \
E -= needed; \
SAVE_CP(E); \
} while (0)
#define SAVE_CP(X) \
do { \
*(X) = make_cp(c_p->cp); \
c_p->cp = 0; \
} while(0)
#define RESTORE_CP(X) SET_CP(c_p, cp_val(*(X)))
#define D(N) \
RESTORE_CP(E); \
E += (N) + 1;
OpCase(deallocate_I): {
Eterm* next;
PreFetch(1, next);
D(Arg(0));
NextPF(1, next);
}
结论是:
尾递归的效率要比递归的好很多, 包括堆栈的使用和参数的分配,就是代码写起来不那么直观。