recon 是ferd 大神 释出的一个 用于生产环境诊断Erlang 问题的一个工具, 不仅仅是对Erlang stdlib 接口的封装, 还有memory fragmentation 相关的函数.
下面对rencon的各个函数进行解读,做个笔记
-module(recon). -export([info/1, info/2, info/3, info/4, proc_count/2, proc_window/3, bin_leak/1, node_stats_print/2, node_stats_list/2, node_stats/4, scheduler_usage/1]). -export([get_state/1, get_state/2]). -export([remote_load/1, remote_load/2, source/1]). -export([tcp/0, udp/0, sctp/0, files/0, port_types/0, inet_count/2, inet_window/3, port_info/1, port_info/2]). -export([rpc/1, rpc/2, rpc/3, named_rpc/1, named_rpc/2, named_rpc/3]).
......
......
info/1,2,3,4 erlang:process_info的封装,默认打印[meta, signals, location, memory_used, work]这几个参数
info(PidTerm, meta) -> info_type(PidTerm, meta, [registered_name, dictionary, group_leader, status]); info(PidTerm, signals) -> info_type(PidTerm, signals, [links, monitors, monitored_by, trap_exit]); info(PidTerm, location) -> info_type(PidTerm, location, [initial_call, current_stacktrace]); info(PidTerm, memory_used) -> info_type(PidTerm, memory_used, [memory, message_queue_len, heap_size, total_heap_size, garbage_collection]); info(PidTerm, work) -> info_type(PidTerm, work, [reductions]);
1个参数是[pid],默认显示5类如上,2个参数是[pid, type],和erlang:process_info/2一样;3个参数A,B,C 3个int,组成
proc_count/2 第一个参数type是process_info_item()(erlang:process_info/2第二个参数的选项)或者binary_memory(使用的内存和),
第二个参数N是个数,打印的是排序后(从大到小)前N个进程的type值(运行这个命令的本进程不在计算内)
proc_window/3 第一个是type(同上),第二个是N(同上),第3个是Time(ms)时间
表示Time时间内,type值得变化增长或减小
bin_leak/1 第一个参数是N, 表示排序后前N个进程(主动gc前后内存使用的变化值)
node_stats_print/2 第一个个参数是N, 表示打印的次数, 第二个是Time(ms)时间,表示打印间隔时间, 打印的内容如下,主要包括进程数,内存,io,gc次数,erlang调度器利用率(调度器线程忙不忙)
Stats = fun({{OldIn,OldOut},{OldGCs,OldWords,_}, SchedWall}) -> %% Absolutes ProcC = erlang:system_info(process_count), RunQ = erlang:statistics(run_queue), {_,LogQ} = process_info(whereis(error_logger), message_queue_len), %% Mem (Absolutes) Mem = erlang:memory(), Tot = proplists:get_value(total, Mem), ProcM = proplists:get_value(processes_used,Mem), Atom = proplists:get_value(atom_used,Mem), Bin = proplists:get_value(binary, Mem), Ets = proplists:get_value(ets, Mem), %% Incremental {{input,In},{output,Out}} = erlang:statistics(io), GC={GCs,Words,_} = erlang:statistics(garbage_collection), BytesIn = In-OldIn, BytesOut = Out-OldOut, GCCount = GCs-OldGCs, GCWords = Words-OldWords, {_, Reds} = erlang:statistics(reductions), SchedWallNew = erlang:statistics(scheduler_wall_time), SchedUsage = recon_lib:scheduler_usage_diff(SchedWall, SchedWallNew), %% Stats Results {{[{process_count,ProcC}, {run_queue,RunQ}, {error_logger_queue_len,LogQ}, {memory_total,Tot}, {memory_procs,ProcM}, {memory_atoms,Atom}, {memory_bin,Bin}, {memory_ets,Ets}], [{bytes_in,BytesIn}, {bytes_out,BytesOut}, {gc_count,GCCount}, {gc_words_reclaimed,GCWords}, {reductions,Reds}, {scheduler_usage, SchedUsage}]}, %% New State {{In,Out}, GC, SchedWallNew}} end,
node_stats_list/2 和node_stats_print/2函数一样,一个个打印出来,一个是返回list
node_stats/4 前2个参数和node_stats_print/2一样,第3个参数是Fun函数,第4个初始状态Acc,表示对每次的采样使用Fun函数,结果加上Acc(4参数)组成列表。
scheduler_usage/1 ,参数表示时间Time, 表示Time时间间隔,erlang调度器利用率(调度器线程忙不忙)的比较,只是node_stats_print/2的第一步。
get_state/1, get_state/2 先使用sys:get_state/2,如果未取得结果使用sys:get_status/2, get_state/1使用默认的5000超时时间
1 recon:get_state(kernel_sup). 2 {state,{local,kernel_sup}, 3 one_for_all, 4 [{child,<0.29.0>,kernel_safe_sup, 5 {supervisor,start_link, 6 [{local,kernel_safe_sup},kernel,safe]}, 7 permanent,infinity,supervisor, 8 [kernel]}, 9 {child,<0.28.0>,kernel_config, 10 {kernel_config,start_link,[]}, 11 permanent,2000,worker, 12 [kernel_config]}, 13 {child,<0.23.0>,user, 14 {user_sup,start,[]}, 15 temporary,2000,supervisor, 16 [user_sup]}, 17 {child,<0.21.0>,standard_error, 18 {standard_error,start_link,[]}, 19 temporary,2000,supervisor, 20 [user_sup]}, 21 {child,<0.20.0>,code_server, 22 {code,start_link,[]}, 23 permanent,2000,worker, 24 [code]}, 25 {child,<0.19.0>,file_server_2, 26 {file_server,start_link,[]}, 27 permanent,2000,worker, 28 [file,file_server,file_io_server,prim_file]}, 29 {child,<0.18.0>,global_group, 30 {global_group,start_link,[]}, 31 permanent,2000,worker, 32 [global_group]}, 33 {child,undefined,net_sup, 34 {erl_distribution,start_link,[]}, 35 permanent,infinity,supervisor, 36 [erl_distribution]}, 37 {child,<0.16.0>,inet_db, 38 {inet_db,start_link,[]}, 39 permanent,2000,worker, 40 [inet_db]}, 41 {child,<0.13.0>,global_name_server, 42 {global,start_link,[]}, 43 permanent,2000,worker, 44 [global]}, 45 {child,<0.12.0>,rex, 46 {rpc,start_link,[]}, 47 permanent,2000,worker, 48 [rpc]}], 49 undefined,0,1,[],kernel,[]}
remote_load/1, remote_load/2 远程热更新节点,第一个参数是nodes()(不包括node()),remote_load/1用的默认的nodes/0,
remote_load(Nodes=[_|_], Mod) when is_atom(Mod) -> {Mod, Bin, File} = code:get_object_code(Mod), rpc:multicall(Nodes, code, load_binary, [Mod, File, Bin]); remote_load(Nodes=[_|_], Modules) when is_list(Modules) -> [remote_load(Nodes, Mod) || Mod <- Modules]; remote_load(Node, Mod) -> remote_load([Node], Mod).
source/1 根据模块(.beam)生成(.erl)
tcp/0, udp/0, sctp/0, files/0 各种类型的端口,根据erlang:ports/0的name分开
port_types/0 根据erlang:ports/0的name统计个数
inet_count/2 第一个参数 ['recv_cnt' | 'recv_oct' | 'send_cnt' | 'send_oct'| 'cnt' | 'oct'], cnt表示recv_cnt和send_cnt, oct表示recv_oct和send_oct
第二个参数是个数N,表示all inet ports (TCP, UDP, SCTP)中第一个参数(收发包数)的前N个
inet_window/3 第1,2个参数同上,第3个参数Time时间,表示Time时间内第一个参数(收发包数)的前N个
port_info/1, port_info/2 erlang:port_info/2的封装
port_info(PortTerm) -> Port = recon_lib:term_to_port(PortTerm), [port_info(Port, Type) || Type <- [meta, signals, io, memory_used, specific]].
rpc/1, rpc/2, rpc/3, named_rpc/1, named_rpc/2, named_rpc/3 这些全是rpc的封装,区别如下
1 ...... 2 ...... 3 rpc(Nodes=[_|_], Fun, Timeout) when is_function(Fun,0) -> 4 rpc:multicall(Nodes, erlang, apply, [Fun,[]], Timeout); 5 ...... 6 ...... 7 named_rpc(Nodes=[_|_], Fun, Timeout) when is_function(Fun,0) -> 8 rpc:multicall(Nodes, erlang, apply, [fun() -> {node(),Fun()} end,[]], Timeout); 9 ...... 10 ......