ranch是一个用erlang实现的tcp链接的管理池,他有如下特点(摘抄自git库):
Ranch aims to provide everything you need to accept TCP connections with a small code base and low latency while being easy to use directly as an application or to embed into your own.
Ranch provides a modular design, letting you choose which transport and protocol are going to be used for a particular listener. Listeners accept and manage connections on one port, and include facilities to limit the number of concurrent connections. Connections are sorted into pools, each pool having a different configurable limit.
Ranch also allows you to upgrade the acceptor pool without having to close any of the currently opened sockets.
简单来说就是
项目用一个简单的echo服务介绍了一个简单的ranch使用模型。
tcp_echo_app.erl
start(_Type, _Args) ->
{ok, _} = ranch:start_listener(tcp_echo, 1,
ranch_tcp, [{port, 5555}], echo_protocol, []),
tcp_echo_sup:start_link().
echo_protocol.erl
-module(echo_protocol).
-behaviour(ranch_protocol).
-export([start_link/4]).
-export([init/4]).
start_link(Ref, Socket, Transport, Opts) ->
Pid = spawn_link(?MODULE, init, [Ref, Socket, Transport, Opts]),
{ok, Pid}.
init(Ref, Socket, Transport, _Opts = []) ->
ok = ranch:accept_ack(Ref),
loop(Socket, Transport).
loop(Socket, Transport) ->
case Transport:recv(Socket, 0, 5000) of
{ok, Data} ->
Transport:send(Socket, Data),
loop(Socket, Transport);
_ ->
ok = Transport:close(Socket)
end.
当然要使用这些逻辑的基础前提是首先application:ensure_all_started(ranch).
直接看的话还是有点难懂,主要体现在:
现在我们带着这些问题开始学习ranch的逻辑
要解答这个问题,我们得先知道ranch的application启动后都做了哪些事情
ranch_app.erl
-module(ranch_app).
-behaviour(application).
start(_, _) ->
_ = consider_profiling(),
ranch_sup:start_link().
我们暂时忽略consider_profiling(实际上这部分是用来做prof分析的根据app配置决定是否启动,和功能逻辑无关),也就是启动了ranch_sup一个貌似supervisor的管理者
ranch_sup.erl
-module(ranch_sup).
-behaviour(supervisor).
-spec start_link() -> {ok, pid()}.
start_link() ->
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
init([]) ->
ranch_server = ets:new(ranch_server, [
ordered_set, public, named_table]),
Procs = [
{ranch_server, {ranch_server, start_link, []},
permanent, 5000, worker, [ranch_server]}
],
{ok, {{one_for_one, 10, 10}, Procs}}.
看来并没有猜错,ranch_sup就是一个supervisor,他建立了一张ets表,然后定义了自己的child的描述。再回到例子的ranch:start_listener 就可以大胆猜测下,其实就是启动一个supervisor用来管理之前特性说明里的,同时支持多个tcp端口监听实例,而这些实例统一由ranch_sup来管理,当然这还是我们的猜测,还要看下ranch_server的具体实现才能确定。
ranch_server.erl
-module(ranch_server).
-behaviour(gen_server).
-define(TAB, ?MODULE).
start_link() ->
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
init([]) ->
Monitors = [{{erlang:monitor(process, Pid), Pid}, Ref} ||
[Ref, Pid] <- ets:match(?TAB, {{conns_sup, '$1'}, '$2'})],
{ok, #state{monitors=Monitors}}.
看起来并不是我们猜测的那样,ranch_server并没有要和某一个端口进行关联之类的逻辑,再看看的handle_call的回调内容
handle_call({set_new_listener_opts, Ref, MaxConns, Opts}, _, State) ->
ets:insert(?TAB, {{max_conns, Ref}, MaxConns}),
ets:insert(?TAB, {{opts, Ref}, Opts}),
{reply, ok, State};
handle_call({set_connections_sup, Ref, Pid}, _,
State=#state{monitors=Monitors}) ->
case ets:insert_new(?TAB, {{conns_sup, Ref}, Pid}) of
true ->
MonitorRef = erlang:monitor(process, Pid),
{reply, true,
State#state{monitors=[{{MonitorRef, Pid}, Ref}|Monitors]}};
false ->
{reply, false, State}
end;
handle_call({set_port, Ref, Port}, _, State) ->
true = ets:insert(?TAB, {{port, Ref}, Port}),
{reply, ok, State};
handle_call({set_max_conns, Ref, MaxConns}, _, State) ->
ets:insert(?TAB, {{max_conns, Ref}, MaxConns}),
ConnsSup = get_connections_sup(Ref),
ConnsSup ! {set_max_conns, MaxConns},
{reply, ok, State};
handle_call({set_opts, Ref, Opts}, _, State) ->
ets:insert(?TAB, {{opts, Ref}, Opts}),
ConnsSup = get_connections_sup(Ref),
ConnsSup ! {set_opts, Opts},
{reply, ok, State};
原来ranch_server起得是一个管理者的角色,他管理着所有当前的实例,相当与ranch_server这张ets表的代理者
总结一下就是ranch的application使用ranch_sup管理了ranch_server进程,ranch_server进程作为ranch_server这张ets表的代理者保存记录着当前所有的实例的一些信息。有了这些基础工作才能实现多实例的管理。
ranch.erl
-spec start_listener(ref(), non_neg_integer(), module(), any(), module(), any())
-> {ok, pid()} | {error, badarg}.
start_listener(Ref, NbAcceptors, Transport, TransOpts, Protocol, ProtoOpts)
when is_integer(NbAcceptors) andalso is_atom(Transport)
andalso is_atom(Protocol) ->
_ = code:ensure_loaded(Transport),
case erlang:function_exported(Transport, name, 0) of
false ->
{error, badarg};
true ->
Res = supervisor:start_child(ranch_sup, child_spec(Ref, NbAcceptors,
Transport, TransOpts, Protocol, ProtoOpts)),
Socket = proplists:get_value(socket, TransOpts),
case Res of
{ok, Pid} when Socket =/= undefined ->
%% Give ownership of the socket to ranch_acceptors_sup
%% to make sure the socket stays open as long as the
%% listener is alive. If the socket closes however there
%% will be no way to recover because we don't know how
%% to open it again.
Children = supervisor:which_children(Pid),
{_, AcceptorsSup, _, _}
= lists:keyfind(ranch_acceptors_sup, 1, Children),
%%% Note: the catch is here because SSL crashes when you change
%%% the controlling process of a listen socket because of a bug.
%%% The bug will be fixed in R16.
catch Transport:controlling_process(Socket, AcceptorsSup);
_ ->
ok
end,
Res
end.
-spec child_spec(ref(), non_neg_integer(), module(), any(), module(), any())
-> supervisor:child_spec().
child_spec(Ref, NbAcceptors, Transport, TransOpts, Protocol, ProtoOpts)
when is_integer(NbAcceptors) andalso is_atom(Transport)
andalso is_atom(Protocol) ->
{{ranch_listener_sup, Ref}, {ranch_listener_sup, start_link, [
Ref, NbAcceptors, Transport, TransOpts, Protocol, ProtoOpts
]}, permanent, infinity, supervisor, [ranch_listener_sup]}.
start_listener函数的所有参数都被传给ranch_listener_sup了,所以我们只能再去ranch_listener_sup里看看了
ranch_listener_sup.erl
-module(ranch_listener_sup).
-behaviour(supervisor).
start_link(Ref, NbAcceptors, Transport, TransOpts, Protocol, ProtoOpts) ->
MaxConns = proplists:get_value(max_connections, TransOpts, 1024),
ranch_server:set_new_listener_opts(Ref, MaxConns, ProtoOpts),
supervisor:start_link(?MODULE, {
Ref, NbAcceptors, Transport, TransOpts, Protocol
}).
init({Ref, NbAcceptors, Transport, TransOpts, Protocol}) ->
AckTimeout = proplists:get_value(ack_timeout, TransOpts, 5000),
ConnType = proplists:get_value(connection_type, TransOpts, worker),
Shutdown = proplists:get_value(shutdown, TransOpts, 5000),
ChildSpecs = [
{ranch_conns_sup, {ranch_conns_sup, start_link,
[Ref, ConnType, Shutdown, Transport, AckTimeout, Protocol]},
permanent, infinity, supervisor, [ranch_conns_sup]},
{ranch_acceptors_sup, {ranch_acceptors_sup, start_link,
[Ref, NbAcceptors, Transport, TransOpts]},
permanent, infinity, supervisor, [ranch_acceptors_sup]}
],
{ok, {{rest_for_one, 10, 10}, ChildSpecs}}.
好吧,ProtoOpts倒是被set后就没再继续进行传递了,TransOpts被get出来几个值,之后有和其他的参数原模原样的传给了ranch_conns_sup和ranch_acceptors_sup,在这里尝试看下ProtoOpts的用法
ranch_server.erl
set_new_listener_opts(Ref, MaxConns, Opts) ->
gen_server:call(?MODULE, {set_new_listener_opts, Ref, MaxConns, Opts}).
handle_call({set_new_listener_opts, Ref, MaxConns, Opts}, _, State) ->
ets:insert(?TAB, {{max_conns, Ref}, MaxConns}),
ets:insert(?TAB, {{opts, Ref}, Opts}),
{reply, ok, State};
get_protocol_options(Ref) ->
ets:lookup_element(?TAB, {opts, Ref}, 2).
ranch_server对opts提供了基于ets的 get和set接口
ranch_conns_sup.erl
init(Parent, Ref, ConnType, Shutdown, Transport, AckTimeout, Protocol) ->
process_flag(trap_exit, true),
ok = ranch_server:set_connections_sup(Ref, self()),
MaxConns = ranch_server:get_max_connections(Ref),
Opts = ranch_server:get_protocol_options(Ref), %%这里被调用
ok = proc_lib:init_ack(Parent, {ok, self()}),
loop(#state{parent=Parent, ref=Ref, conn_type=ConnType,
shutdown=Shutdown, transport=Transport, protocol=Protocol,
opts=Opts, ack_timeout=AckTimeout, max_conns=MaxConns}, 0, 0, []).
loop(State=#state{parent=Parent, ref=Ref, conn_type=ConnType,
transport=Transport, protocol=Protocol, opts=Opts,
ack_timeout=AckTimeout, max_conns=MaxConns},
CurConns, NbChildren, Sleepers) ->
receive
{?MODULE, start_protocol, To, Socket} ->
case Protocol:start_link(Ref, Socket, Transport, Opts) of
Transport:controlling_process(Socket, Pid),
...
Ret ->
To ! self(),
error_logger:error_msg(
"Ranch listener ~p connection process start failure; "
"~p:start_link/4 returned: ~999999p~n",
[Ref, Protocol, Ret]),
Transport:close(Socket),
也就是被原模原样的传给了刚开始的Protocol指定的模块,ranch本身不需要这部分,Opts是提供给使用者的,针对单独实例的一个dict,可以在自己的Protocol启动逻辑里使用,而Transport这里用了Transport:controlling_process(Socket, Pid),Transport:close(Socket),看起来很像gen_tcp模块这种东西,结合echo例子用的ranch_tcp,这时候就没法猜只能看文档了
A transport defines the interface to interact with a socket.
TCP transport
The TCP transport is a thin wrapper aroundgen_tcp
.
SSL transport
The SSL transport is a thin wrapper aroundssl
.
ranch库本身提供了基于gen_tcp实现的ranch_tcp和基于ssl实现的ranch_ssl,当然也可以自定义transport,只需要实现一个符合ranch_transport behavior 的模块即可。
现在我们解决了参数中的四个
Ref , NbAcceptors, Transport , TransOpts, Protocol , ProtoOpts
Ref是用来标记这个实例的名字,用atom来描述
Transport 指定当前数据传输的方式,例子里用的ranch_tcp
Protocol 指定tcp消息的具体处理模块,一般来说是一个gen_server描述,这里具体的逻辑还要在细看
ProtoOpts 则是传给这个模块的配置,供用户模块自己使用,ranch会根据Ref标记来存储这些值
接着看代码
ranch_acceptors_sup
-module(ranch_acceptors_sup).
-behaviour(supervisor).
-spec start_link(ranch:ref(), non_neg_integer(), module(), any())
-> {ok, pid()}.
start_link(Ref, NbAcceptors, Transport, TransOpts) ->
supervisor:start_link(?MODULE, [Ref, NbAcceptors, Transport, TransOpts]).
init([Ref, NbAcceptors, Transport, TransOpts]) ->
ConnsSup = ranch_server:get_connections_sup(Ref),
LSocket = case proplists:get_value(socket, TransOpts) of
undefined ->
{ok, Socket} = Transport:listen(TransOpts),
Socket;
Socket ->
Socket
end,
{ok, {_, Port}} = Transport:sockname(LSocket),
ranch_server:set_port(Ref, Port),
Procs = [
{{acceptor, self(), N}, {ranch_acceptor, start_link, [
LSocket, Transport, ConnsSup
]}, permanent, brutal_kill, worker, []}
|| N <- lists:seq(1, NbAcceptors)],
{ok, {{one_for_one, 10, 10}, Procs}}.
ranch_tcp.erl
listen(Opts) ->
Opts2 = ranch:set_option_default(Opts, backlog, 1024),
Opts3 = ranch:set_option_default(Opts2, send_timeout, 30000),
Opts4 = ranch:set_option_default(Opts3, send_timeout_close, true),
%% We set the port to 0 because it is given in the Opts directly.
%% The port in the options takes precedence over the one in the
%% first argument.
gen_tcp:listen(0, ranch:filter_options(Opts4,
[backlog, ip, linger, nodelay, port, raw,
send_timeout, send_timeout_close],
[binary, {active, false}, {packet, raw},
{reuseaddr, true}, {nodelay, true}])).
NbAcceptors比较容易理解,这是一个int值指定了有多少个accepter用来处理对LSocket发起的连接请求。
TransOpts的用法就相对复杂,首先使用的socket字段的值,如果就直接把这个值当做LSocket进入逻辑,如果没有就调用Transport:listen来监听端口来生成LSocket
对于gen_tcp他允许用户指定ip,port,linger,nodelay,raw这五个字段,其他的由ranch_tcp指定,不允许用户设定。
这5个参数都是gen_tcp模块的原生逻辑,参考gen_tcp模块即可这就不再做解释
也就是说TransOpts 描述的LSocket,可以直接通过socket字段来指定一个LSocket完成后续逻辑,或者指定要监听端口的参数包括且限于上面说的5个参数来指定tcp连接的属性。
至此,我们探究完了ranch:start_listener的所有参数。
现在我们已经可以直接回答这个问题的第二部分,echo_protocol就是上面我们谈到的Protocol,他是用来指定Socket的具体处理逻辑模块的,他的参数全部由ranch:start_listener 提供,这里我们看下echo_protocol的具体定义
echo_protocol.erl
-module(echo_protocol).
-behaviour(ranch_protocol).
start_link(Ref, Socket, Transport, Opts) ->
Pid = spawn_link(?MODULE, init, [Ref, Socket, Transport, Opts]),
{ok, Pid}.
init(Ref, Socket, Transport, _Opts = []) ->
ok = ranch:accept_ack(Ref),
loop(Socket, Transport).
loop(Socket, Transport) ->
case Transport:recv(Socket, 0, 5000) of
{ok, Data} ->
Transport:send(Socket, Data),
loop(Socket, Transport);
_ ->
ok = Transport:close(Socket)
end.
ranch_protocol.erl
-module(ranch_protocol).
%% Start a new connection process for the given socket.
-callback start_link(
Ref::ranch:ref(),
Socket::any(),
Transport::module(),
ProtocolOptions::any())
-> {ok, ConnectionPid::pid()}.
ranch_protocol只需要完成一个回调 start_link/4的定义即可,入参分别为监听标识,accept之后生成的Socket,传输方式Transport,还是有ProtocolOptions
例子中的内容也很清楚,启动一个进程,进入loop循环,每次将从socket收到的内容原模原样返回回去,需要注意的是ranch:accept_ack(Ref)这步,原因的话看下官方文档:
The newly started process can then freely initialize itself. However,
it must callranch:accept_ack/1
before doing any socket operation.
This will ensure the connection process is the owner of the socket.
It expects the listener’s name as argument.
要理解这部分就必须完整的看完accept到具体的逻辑处理的过程,我们从accept找起
ranch_acceptor.erl
loop(LSocket, Transport, ConnsSup) ->
_ = case Transport:accept(LSocket, infinity) of
{ok, CSocket} ->
Transport:controlling_process(CSocket, ConnsSup),
%% This call will not return until process has been started
%% AND we are below the maximum number of connections.
ranch_conns_sup:start_protocol(ConnsSup, CSocket);
之前我们启动了NbAcceptors个ranch_acceptor用来处理LSocket的accept请求,他们每个都阻塞在Transport:accept/2 这里,当有连接请求被处理后,将Socket控制权交给connsSup,然后给sup进行通知
ranch_conns_sup.erl
-spec start_protocol(pid(), inet:socket()) -> ok.
start_protocol(SupPid, Socket) ->
SupPid ! {?MODULE, start_protocol, self(), Socket},
receive SupPid -> ok end.
loop(State=#state{parent=Parent, ref=Ref, conn_type=ConnType,
transport=Transport, protocol=Protocol, opts=Opts,
ack_timeout=AckTimeout, max_conns=MaxConns},
CurConns, NbChildren, Sleepers) ->
receive
{?MODULE, start_protocol, To, Socket} ->
case Protocol:start_link(Ref, Socket, Transport, Opts) of
{ok, Pid} ->
Transport:controlling_process(Socket, Pid),
Pid ! {shoot, Ref, Transport, Socket, AckTimeout},
put(Pid, true),
CurConns2 = CurConns + 1,
if CurConns2 < MaxConns ->
To ! self(),
loop(State, CurConns2, NbChildren + 1,
Sleepers);
true ->
loop(State, CurConns2, NbChildren + 1,
[To|Sleepers])
end;
...
可以看到是先调用Protocol:start_link/4 启动了Protocol模块指定的进程,然后将Socket的控制权交给这个新启动的进程,之后再发送一条消息知会处理进程控制权已经交接完毕,这个Socket可以使用了,反过来说就是在收到这条shot消息之前,我们并不能确定Socket的控制权已经交接完毕,所以在echo_protocol里我们首先调用了ranch:accept_ack/1 确保进程init完成后就是可用的。之前看ranch_tcp:listen/1的时候我们也看到了强制设置的socket为{activie,false},因此也不用担心消息丢失。在拿到Socket控制权之后就可以随意更改Socket设置了
If your protocol code requires specific socket options, you should
set them while initializing your connection process, after
callingranch:accept_ack/1
. You can useTransport:setopts/2
for that purpose.
这里还有一点要注意就是,如果Protocol是一个gen_server描述,那么gen_server:start_link是一个阻塞调用,意味着Protocol:start_link(Ref, Socket, Transport, Opts) 在你的init函数里有ranch:accept_ack/1的情况下是永远无法返回的,所以得专门处理下,官方文档里提供了两种解决方案
Special processes like the ones that use the
gen_server
orgen_fsm
behaviours have the particularity of having theirstart_link
call not
return until theinit
function returns. This is problematic, because
you won’t be able to callranch:accept_ack/1
from theinit
callback
as this would cause a deadlock to happen.
There are two ways of solving this problem.
The first, and probably the most elegant one, is to make use of the
gen_server:enter_loop/3
function. It allows you to start your process
normally (although it must be started withproc_lib
like all special
processes), then perform any needed operations before falling back into
the normalgen_server
execution loop.
-module(my_protocol).
-behaviour(gen_server).
-behaviour(ranch_protocol).
-export([start_link/4]).
-export([init/4]).
%% Exports of other gen_server callbacks here.
start_link(Ref, Socket, Transport, Opts) ->
proc_lib:start_link(?MODULE, init, [Ref, Socket, Transport, Opts]).
init(Ref, Socket, Transport, _Opts = []) ->
ok = proc_lib:init_ack({ok, self()}),
%% Perform any required state initialization here.
ok = ranch:accept_ack(Ref),
ok = Transport:setopts(Socket, [{active, once}]),
gen_server:enter_loop(?MODULE, [], {state, Socket, Transport}).
%% Other gen_server callbacks here.
The second method involves triggering a timeout just after
gen_server:init
ends. If you return a timeout value of0
then thegen_server
will call
handle_info(timeout, _, _)
right away.
-module(my_protocol).
-behaviour(gen_server).
-behaviour(ranch_protocol).
%% Exports go here.
init([Ref, Socket, Transport]) ->
{ok, {state, Ref, Socket, Transport}, 0}.
handle_info(timeout, State={state, Ref, Socket, Transport}) ->
ok = ranch:accept_ack(Ref),
ok = Transport:setopts(Socket, [{active, once}]),
{noreply, State};
%% ...
就是要么通过使用gen_server:enter_loop/3
,和proc_lib
的配合,强制让启动gen_server
的时候直接调用Mod:init
而不是gen_server:init_it/6
来避免进程启动的阻塞。
或者就是确保进程启动后执行的第一个动作一定是ranch:accept_ack/1
,通过gen_server
的启动超时机制,在超时回调里加入ranch:accept_ack/1
来确保ack的执行
乍一看tcp_echo_sup好像确实没用到啊。事实在这个例子来说他确实没用到,不过例子这么写也是有原因的
tcp_echo.app.src
{application, tcp_echo, [
{description, "Ranch TCP echo example."},
{vsn, "1"},
{modules, []},
{registered, [tcp_echo_sup]},
{applications, [
kernel,
stdlib,
ranch
]},
{mod, {tcp_echo_app, []}},
{env, []}
]}.
其实这个代表的就是你自己项目的主sup,也就是下面的your application supervisors,他就是用来挂载启动ranch实例的
To embed Ranch in your application you can simply add the child specs
to your supervision tree. This can all be done in theinit/1
function
of one of your application supervisors.
Ranch requires at the minimum two kinds of child specs for embedding.
First, you need to addranch_sup
to your supervision tree, only once,
regardless of the number of listeners you will use. Then you need to
add the child specs for each listener.
ranch_listener_sup是一个实例的主持者,他会根据入参初始化ranch_acceptors_sup,决定启动多少个ranch_accepter来进行accept工作,同时启动ranch_conns_sup等待连接到来,以后没有一个连接到来,都会启动一个用户定义的Protocol模块对应的进程来处理Socket信息,这个进程挂在在ranch_conns_sup下被统一管理
而ranch_server管理记录着这个实例的绝大多数基础信息
篇幅原因下一篇再来谈如何将ranch集成到自己的项目中