前面我们已经聊过了Erlang的Global模块和Trap机制。这篇Blog,将会讨论下Erlang的节点是怎么互联的,主要是对net_kernel的一些代码分析。由于oschina的编辑器不支持Erlang的语法高亮,请亲们多多见谅吧。
在Erlang整个环境启动的时候,会创建一个叫做net_kernel的Erlang进程,这个进程是一个gen_server。net_kernel主要用来处理Erlang网络协议。下面我们就进入正题,net_kernel中的connect函数。
net_kenrel:connect本身就是一个gen_server:call,我们直接看net_kernel:handle_call的代码。
handle_call({connect, _, Node}, From, State) when Node =:= node() -> async_reply({reply, true, State}, From); handle_call({connect, Type, Node}, From, State) -> verbose({connect, Type, Node}, 1, State), case ets:lookup(sys_dist, Node) of [Conn] when Conn#connection.state =:= up -> async_reply({reply, true, State}, From); [Conn] when Conn#connection.state =:= pending -> Waiting = Conn#connection.waiting, ets:insert(sys_dist, Conn#connection{waiting = [From|Waiting]}), {noreply, State}; [Conn] when Conn#connection.state =:= up_pending -> Waiting = Conn#connection.waiting, ets:insert(sys_dist, Conn#connection{waiting = [From|Waiting]}), {noreply, State}; _ -> case setup(Node,Type,From,State) of {ok, SetupPid} -> Owners = [{SetupPid, Node} | State#state.conn_owners], {noreply,State#state{conn_owners=Owners}}; _ -> ?connect_failure(Node, {setup_call, failed}), async_reply({reply, false, State}, From) end end;
其中,我们可以看出,如果目标节点是自身,那么直接就忽略掉,返回成功。如果目标节点不是自身,先看一下ets中是否有向远程节点连接的进程。当这进行连接的进程状态是up,则直接返回true,否则将请求进程加入连接等待队列中。如果我们没有向远程节点进行连接的进程,则调用setup函数来建立一个。让我接着跟踪一下setup这个函数做了什么。
%连接新的节点 setup(Node,Type,From,State) -> Allowed = State#state.allowed, case lists:member(Node, Allowed) of false when Allowed =/= [] -> error_msg("** Connection attempt with " "disallowed node ~w ** ~n", [Node]), {error, bad_node}; _ -> case select_mod(Node, State#state.listen) of %获得连接远程节点的Module {ok, L} -> Mod = L#listen.module, LAddr = L#listen.address, MyNode = State#state.node, Pid = Mod:setup(Node, Type, MyNode, State#state.type, State#state.connecttime), Addr = LAddr#net_address { address = undefined, host = undefined }, ets:insert(sys_dist, #connection{node = Node, state = pending, owner = Pid, waiting = [From], address = Addr, type = normal}), {ok, Pid}; Error -> Error end end. %% %% Find a module that is willing to handle connection setup to Node %% select_mod(Node, [L|Ls]) -> Mod = L#listen.module, case Mod:select(Node) of true -> {ok, L}; false -> select_mod(Node, Ls) end; select_mod(Node, []) -> {error, {unsupported_address_type, Node}}.
在setup函数中,我们需要先找出连接远程节点所使用的模块名称,一般情况下是inet_tcp_dist这个模块。我们下面假定是使用inet_tcp_dist这个模块,这个时候net_kernel会调用inet_tcp_dist:setup,并将成功后的Erlang进程ID放入ets中。
让我们看下inet_tcp_dist:setup函数
setup(Node, Type, MyNode, LongOrShortNames,SetupTime) -> spawn_opt(?MODULE, do_setup, [self(), Node, Type, MyNode, LongOrShortNames, SetupTime], [link, {priority, max}]). do_setup(Kernel, Node, Type, MyNode, LongOrShortNames,SetupTime) -> ?trace("~p~n",[{inet_tcp_dist,self(),setup,Node}]), [Name, Address] = splitnode(Node, LongOrShortNames), case inet:getaddr(Address, inet) of {ok, Ip} -> Timer = dist_util:start_timer(SetupTime), %用epmd协议获得远程节点的端口 case erl_epmd:port_please(Name, Ip) of {port, TcpPort, Version} -> ?trace("port_please(~p) -> version ~p~n", [Node,Version]), dist_util:reset_timer(Timer), %连接远程节点 case inet_tcp:connect(Ip, TcpPort, [{active, false}, {packet,2}]) of %拿到Socket之后,定义各种回调函数,状态以及状态机函数 {ok, Socket} -> HSData = #hs_data{ kernel_pid = Kernel, other_node = Node, this_node = MyNode, socket = Socket, timer = Timer, this_flags = 0, other_version = Version, f_send = fun inet_tcp:send/2, f_recv = fun inet_tcp:recv/3, f_setopts_pre_nodeup = fun(S) -> inet:setopts (S, [{active, false}, {packet, 4}, nodelay()]) end, f_setopts_post_nodeup = fun(S) -> inet:setopts (S, [{active, true}, {deliver, port}, {packet, 4}, nodelay()]) end, f_getll = fun inet:getll/1, f_address = fun(_,_) -> #net_address{ address = {Ip,TcpPort}, host = Address, protocol = tcp, family = inet} end, mf_tick = fun ?MODULE:tick/1, mf_getstat = fun ?MODULE:getstat/1, request_type = Type }, %进行握手 dist_util:handshake_we_started(HSData); _ -> %% Other Node may have closed since %% port_please ! ?trace("other node (~p) " "closed since port_please.~n", [Node]), ?shutdown(Node) end; _ -> ?trace("port_please (~p) " "failed.~n", [Node]), ?shutdown(Node) end; _Other -> ?trace("inet_getaddr(~p) " "failed (~p).~n", [Node,_Other]), ?shutdown(Node) end.
顺便说一句,当独立进程epmd发现自己和某个node的连接断了,那么直接将这个node注册的名字和端口从自身缓存中删除掉。从这里面我们可以看出,Erlang依然是使用inet这模块完成tcp连接,用inet这模块完成数据收发和节点直接的心跳。
让我们看下dist_util:handshake_we_started以及和它相关的函数
handshake_we_started(#hs_data{request_type=ReqType, other_node=Node}=PreHSData) -> PreThisFlags = make_this_flags(ReqType, Node), HSData = PreHSData#hs_data{this_flags=PreThisFlags}, send_name(HSData), recv_status(HSData), {PreOtherFlags,ChallengeA} = recv_challenge(HSData), {ThisFlags,OtherFlags} = adjust_flags(PreThisFlags, PreOtherFlags), NewHSData = HSData#hs_data{this_flags = ThisFlags, other_flags = OtherFlags, other_started = false}, check_dflag_xnc(NewHSData), MyChallenge = gen_challenge(), {MyCookie,HisCookie} = get_cookies(Node), send_challenge_reply(NewHSData,MyChallenge, gen_digest(ChallengeA,HisCookie)), reset_timer(NewHSData#hs_data.timer), recv_challenge_ack(NewHSData, MyChallenge, MyCookie), connection(NewHSData). %% -------------------------------------------------------------- %% The connection has been established. %% -------------------------------------------------------------- connection(#hs_data{other_node = Node, socket = Socket, f_address = FAddress, f_setopts_pre_nodeup = FPreNodeup, f_setopts_post_nodeup = FPostNodeup}= HSData) -> cancel_timer(HSData#hs_data.timer), PType = publish_type(HSData#hs_data.other_flags), case FPreNodeup(Socket) of ok -> do_setnode(HSData), % Succeeds or exits the process. Address = FAddress(Socket,Node), mark_nodeup(HSData,Address), case FPostNodeup(Socket) of ok -> con_loop(HSData#hs_data.kernel_pid, Node, Socket, Address, HSData#hs_data.this_node, PType, #tick{}, HSData#hs_data.mf_tick, HSData#hs_data.mf_getstat); _ -> ?shutdown2(Node, connection_setup_failed) end; _ -> ?shutdown(Node) end. con_loop(Kernel, Node, Socket, TcpAddress, MyNode, Type, Tick, MFTick, MFGetstat) -> receive {tcp_closed, Socket} -> ?shutdown2(Node, connection_closed); {Kernel, disconnect} -> ?shutdown2(Node, disconnected); {Kernel, aux_tick} -> case MFGetstat(Socket) of {ok, _, _, PendWrite} -> send_tick(Socket, PendWrite, MFTick); _ -> ignore_it end, con_loop(Kernel, Node, Socket, TcpAddress, MyNode, Type, Tick, MFTick, MFGetstat); {Kernel, tick} -> case send_tick(Socket, Tick, Type, MFTick, MFGetstat) of {ok, NewTick} -> con_loop(Kernel, Node, Socket, TcpAddress, MyNode, Type, NewTick, MFTick, MFGetstat); {error, not_responding} -> error_msg("** Node ~p not responding **~n" "** Removing (timedout) connection **~n", [Node]), ?shutdown2(Node, net_tick_timeout); _Other -> ?shutdown2(Node, send_net_tick_failed) end; {From, get_status} -> case MFGetstat(Socket) of {ok, Read, Write, _} -> From ! {self(), get_status, {ok, Read, Write}}, con_loop(Kernel, Node, Socket, TcpAddress, MyNode, Type, Tick, MFTick, MFGetstat); _ -> ?shutdown2(Node, get_status_failed) end end.
在这里面,handshake_we_started和远程节点进行一次验证。验证过程非常简单,远程节点生成一个随机数,然后将这个随机数发给当前节点,然后当前节点用它所知道的远程节点的cookie加上这个随机数生成一个MD5,并将这个MD5返回给远程节点,本端节点对远程节点的验证也是如此。当完成了验证,我们会进入connection这个函数,这是时候,函数首先会执行do_setnode,告诉Erts我们已经和远程的连接上了。同时通知net_kernel我们已经连上了远程,需要它改变ets连接中的状态和进行后续的操作。接着这个进程进入了和远程节点心跳监控的状态。