参考资料:http://erlangcentral.org/wiki/index.php/Building_a_Non-blocking_TCP_server_using_OTP_principles
服务器设计
tcp_server_app下的根监控树使用one_for_one重启策略。两个子树应用,第一个是一个tcp套接字监听服务器,使用gen_server模式来实现,采用异步监听的客户端连接的模式。第二个是一个客户端应用,使用gen_fsm模式实现,使用标准SASL错误报告接口,记录客户端消息处理的日志以及非正常与服务器断开连接日志。
整体应用架构:
+----------------+
| tcp_server_app |
+--------+-------+
| (one_for_one)
+----------------+---------+
| |
+-------+------+ +-------+--------+
| tcp_listener | + tcp_client_sup |
+--------------+ +-------+--------+
| (simple_one_for_one)
+-----|---------+
+-------|--------+|
+--------+-------+|+
| tcp_echo_fsm |+
+----------------+
tcp_server代码如下:
1 %% TCP Server Application (tcp_server_app.erl) 2 -module(tcp_server_app). 3 -author('[email protected]'). 4 5 %% 实现application模式 6 -behaviour(application). 7 8 -export([start_client/0]). 9 10 %% 应用程序启动以及监控树回调函数 11 -export([start/2, stop/1, init/1]). 12 13 %% 宏变量定义 14 -define(MAX_RESTART, 5). 15 -define(MAX_TIME, 60). 16 -define(DEF_PORT, 2222). 17 18 %% 启动客户端进程的接口 19 %% 在监听程序建立连接时调用 20 start_client() -> 21 %% 回调第二个init函数,因为第二个是动态添加监控树子节点 22 %% 也就是说这里是两颗不同的监控树,使用了一个模块两个 init 函数来实现 23 supervisor:start_child(tcp_client_sup, []). 24 25 %%---------------------------------------------------------------------- 26 %% Application behaviour callbacks 27 %%---------------------------------------------------------------------- 28 start(_Type, _Args) -> 29 %% 获取端口配置参数,找不到时返回默认端口 ?DEF_PORT 30 ListenPort = get_app_env(listen_port, ?DEF_PORT), 31 32 %% 启动应用程序,回调函数为 第一个 init 函数,根据参数匹配,参数为 [端口,客户端回调模块] 33 %% 第一个 init 函数仅仅是启动了两个监控树 34 supervisor:start_link({local, ?MODULE}, ?MODULE, [ListenPort, tcp_echo_fsm]). 35 36 stop(_S) -> 37 ok. 38 39 %%---------------------------------------------------------------------- 40 %% Supervisor behaviour callbacks 41 %%---------------------------------------------------------------------- 42 init([Port, Module]) -> 43 {ok, 44 %% 监控树策略参数,ono_for_one策略,设置MAX_TIME最多重启的MAX_RESTART次 45 {_SupFlags = {one_for_one, ?MAX_RESTART, ?MAX_TIME}, 46 [ 47 % TCP Listener 48 { tcp_server_sup, % Id = internal id 49 {tcp_listener,start_link,[Port,Module]}, % StartFun = {M, F, A} 50 permanent, % Restart = permanent | transient | temporary 51 2000, % Shutdown = brutal_kill | int() >= 0 | infinity 52 worker, % Type = worker | supervisor 53 [tcp_listener] % Modules = [Module] | dynamic 54 }, 55 % Client instance supervisor 56 { 57 %% Module参数初始化了tcp_client_sup监控树的 init 函数, init 函数在下面 58 tcp_client_sup, 59 %% 子节点启动策略 60 {supervisor,start_link,[{local, tcp_client_sup}, ?MODULE, [Module]]}, 61 permanent, % Restart = permanent | transient | temporary 62 infinity, % Shutdown = brutal_kill | int() >= 0 | infinity 63 supervisor, % Type = worker | supervisor 64 [] % Modules = [Module] | dynamic 65 } 66 ] 67 } 68 }; 69 70 %% 在服务器接收连接时,创建客户端进程时会回调到这个函数,使用simple_one_for_one启动策略 71 %% 参数 Module 在第一个 72 init([Module]) -> 73 {ok, 74 %% 另外一种根监督树模式,simple_one_for_one策略子节点只能动态添加 75 {_SupFlags = {simple_one_for_one, ?MAX_RESTART, ?MAX_TIME}, 76 [ 77 % TCP Client 78 { undefined, % Id = internal id 79 {Module,start_link,[]}, % StartFun = {M, F, A} 80 temporary, % Restart = permanent | transient | temporary 81 2000, % Shutdown = brutal_kill | int() >= 0 | infinity 82 worker, % Type = worker | supervisor 83 [] % Modules = [Module] | dynamic 84 } 85 ] 86 } 87 }. 88 89 %%---------------------------------------------------------------------- 90 %% Internal functions 91 %%---------------------------------------------------------------------- 92 %% 获取配置文件xxx.app文件中的配置变量 93 get_app_env(Opt, Default) -> 94 case application:get_env(application:get_application(), Opt) of 95 {ok, Val} -> Val; 96 _ -> 97 case init:get_argument(Opt) of 98 [[Val | _]] -> Val; 99 error -> Default 100 end 101 end.
下面是服务端socket监听程序,这里使用了一个不具有官方文档的 api
prim_inet:async_accept/2 来实现一个异步监听套接字的服务器程序,代码如下:
1 % TCP Listener Process (tcp_listener.erl) 2 -module(tcp_listener). 3 -author('[email protected]'). 4 5 %% 实现 gen_server 模式 6 -behaviour(gen_server). 7 8 %% 内部接口 9 -export([start_link/2]). 10 11 %% gen_server 回调函数 12 -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, 13 code_change/3]). 14 15 %% 定义了一个 record 记录 gen_server 进程的状态 16 -record(state, { 17 listener, % Listening socket 18 acceptor, % Asynchronous acceptor's internal reference 19 module % FSM handling module 20 }). 21 22 %%-------------------------------------------------------------------- 23 %% @spec (Port::integer(), Module) -> {ok, Pid} | {error, Reason} 24 %% @doc 监控树调用并开始进行tcp套接字监听 25 %% @end 26 %%---------------------------------------------------------------------- 27 start_link(Port, Module) when is_integer(Port), is_atom(Module) -> 28 gen_server:start_link({local, ?MODULE}, ?MODULE, [Port, Module], []). 29 30 %%%------------------------------------------------------------------------ 31 %%% Callback functions from gen_server 32 %%%------------------------------------------------------------------------ 33 34 %%---------------------------------------------------------------------- 35 %% @spec (Port::integer()) -> {ok, State} | 36 %% {ok, State, Timeout} | 37 %% ignore | 38 %% {stop, Reason} 39 %% 40 %% @doc gen_server启动时回调,并创建 tcp 监听 41 %% @end 42 %%---------------------------------------------------------------------- 43 init([Port, Module]) -> 44 process_flag(trap_exit, true), 45 Opts = [binary, {packet, 2}, {reuseaddr, true}, 46 {keepalive, true}, {backlog, 30}, {active, false}], 47 %% 使用 gen_tcp 模块启动套接字监听,这是一个阻塞动作 48 case gen_tcp:listen(Port, Opts) of 49 {ok, Listen_socket} -> %% 创建监听成功返回监听socket 50 %% 创建第一个接受连接的进程 51 %% prim_inet:async_accept/2开启异步监听 52 %% 之后有客户端连接时会向此进程发送一个异步消息inet_async到进程消息队列 53 %% Ref 存储接受进程的引用 54 {ok, Ref} = prim_inet:async_accept(Listen_socket, -1), 55 {ok, #state{listener = Listen_socket, 56 acceptor = Ref, 57 module = Module}}; 58 {error, Reason} -> 59 {stop, Reason} 60 end. 61 62 %%------------------------------------------------------------------------- 63 %% @spec (Request, From, State) -> {reply, Reply, State} | 64 %% {reply, Reply, State, Timeout} | 65 %% {noreply, State} | 66 %% {noreply, State, Timeout} | 67 %% {stop, Reason, Reply, State} | 68 %% {stop, Reason, State} 69 %% @doc 服务进程被同步调用时的回调函数 70 %% @end 71 %% @private 72 %%------------------------------------------------------------------------- 73 handle_call(Request, _From, State) -> 74 {stop, {unknown_call, Request}, State}. 75 76 %%------------------------------------------------------------------------- 77 %% @spec (Msg, State) ->{noreply, State} | 78 %% {noreply, State, Timeout} | 79 %% {stop, Reason, State} 80 %% @doc 服务进程被异步调用时的回调函数 81 %% @end 82 %% @private 83 %%------------------------------------------------------------------------- 84 handle_cast(_Msg, State) -> 85 {noreply, State}. 86 87 %%------------------------------------------------------------------------- 88 %% @spec (Msg, State) ->{noreply, State} | 89 %% {noreply, State, Timeout} | 90 %% {stop, Reason, State} 91 %% @doc 回调函数,处理那些直接发消息到进程邮箱的事件 92 %% 这里处理的是 {inet_async, ListSock, Ref, {ok, CliSocket}}事件, 93 %% inet_async 表示是一个异步事件,服务器端接收连接采用异步的方式, 94 %% 客户端连接最终会被转化成一个 inet_async 消息发送到进程邮箱等待处理 95 %% {{ok, CliSocket}} 里的CliSocket表示客户端建立的连接套接口 96 %% @end 97 %% @private 98 %%------------------------------------------------------------------------- 99 100 %% 注意这里 ListSock 以及 Ref 做了匹配,只有匹配了才是该监听口接收的连接 101 handle_info({inet_async, ListSock, Ref, {ok, CliSocket}}, 102 #state{listener=ListSock, acceptor=Ref, module=Module} = State) -> 103 try 104 case set_sockopt(ListSock, CliSocket) of 105 ok -> ok; 106 {error, Reason} -> exit({set_sockopt, Reason}) 107 end, 108 109 %% 接收新的客户端连接,启动一个新的客户端状态机进程,动态添加到 tcp_client_sup 客户端监控树 110 {ok, Pid} = tcp_server_app:start_client(), 111 112 %% 绑定 CliSocet 到客户端进程 Pid, 这样CliSocket接收数据都会被转化成Pid代表进程的邮箱消息 113 gen_tcp:controlling_process(CliSocket, Pid), 114 %% Instruct the new FSM that it owns the socket. 115 116 Module:set_socket(Pid, CliSocket), 117 118 %% Signal the network driver that we are ready to accept another connection 119 %% 重新设置异步监听下一个客户端连接的消息,设置新的监听引用 120 %% 必须重新设置才能监听到 {inet_async,S,Ref,Status} 消息 121 case prim_inet:async_accept(ListSock, -1) of 122 {ok, NewRef} -> ok; 123 {error, NewRef} -> exit({async_accept, inet:format_error(NewRef)}) 124 end, 125 126 %% 更新新的监听引用 127 {noreply, State#state{acceptor=NewRef}} 128 catch exit:Why -> 129 error_logger:error_msg("Error in async accept: ~p.\n", [Why]), 130 {stop, Why, State} 131 end; 132 133 %%客户端建立连接的容错处理 134 handle_info({inet_async, ListSock, Ref, Error}, #state{listener=ListSock, acceptor=Ref} = State) -> 135 error_logger:error_msg("Error in socket acceptor: ~p.\n", [Error]), 136 {stop, Error, State}; 137 138 handle_info(_Info, State) -> 139 {noreply, State}. 140 141 %%------------------------------------------------------------------------- 142 %% @spec (Reason, State) -> any 143 %% @doc Callback executed on server shutdown. It is only invoked if 144 %% `process_flag(trap_exit, true)' is set by the server process. 145 %% The return value is ignored. 146 %% @end 147 %% @private 148 %%------------------------------------------------------------------------- 149 terminate(_Reason, State) -> 150 gen_tcp:close(State#state.listener), 151 ok. 152 153 %%------------------------------------------------------------------------- 154 %% @spec (OldVsn, State, Extra) -> {ok, NewState} 155 %% @doc Convert process state when code is changed. 156 %% @end 157 %% @private 158 %%------------------------------------------------------------------------- 159 code_change(_OldVsn, State, _Extra) -> 160 {ok, State}. 161 162 %%%------------------------------------------------------------------------ 163 %%% Internal functions 164 %%%------------------------------------------------------------------------ 165 166 %% 设置客户端socket的参数选项,只是简单的复制了监听服务器的配置选项 167 set_sockopt(ListSock, CliSocket) -> 168 true = inet_db:register_socket(CliSocket, inet_tcp), 169 case prim_inet:getopts(ListSock, [active, nodelay, keepalive, delay_send, priority, tos]) of 170 {ok, Opts} -> 171 case prim_inet:setopts(CliSocket, Opts) of 172 ok -> ok; 173 Error -> gen_tcp:close(CliSocket), Error 174 end; 175 Error -> 176 gen_tcp:close(CliSocket), Error 177 end.
下面是客户端处理输出的状态机:
1 %% TCP Client Socket Handling FSM (tcp_echo_fsm.erl) 2 %% 客户端输出处理状态机,这里其实就是一个 echo_server 的客户端版本 3 4 -module(tcp_echo_fsm). 5 -author('[email protected]'). 6 7 %% 实现 gen_fsm 模式,事实上状态机应用场景没有 gen_server 多 8 %% 不过能用的场景都比较特殊,比如游戏客户端,服务端战斗模块 9 -behaviour(gen_fsm). 10 11 -export([start_link/0, set_socket/2]). 12 13 %% gen_fsm 回调函数 14 -export([init/1, handle_event/3, 15 handle_sync_event/4, handle_info/3, terminate/3, code_change/4]). 16 17 %% FSM States FSM 状态机的状态 18 -export([ 19 'WAIT_FOR_SOCKET'/2, %% 等待socket 20 'WAIT_FOR_DATA'/2 %% 等待socket数据 21 ]). 22 23 -record(state, { 24 socket, % client socket 25 addr % client address 26 }). 27 28 -define(TIMEOUT, 120000). 29 30 %%%------------------------------------------------------------------------ 31 %%% API 32 %%%------------------------------------------------------------------------ 33 34 %%------------------------------------------------------------------------- 35 %% @spec (Socket) -> {ok,Pid} | ignore | {error,Error} 36 %% @doc To be called by the supervisor in order to start the server. 37 %% If init/1 fails with Reason, the function returns {error,Reason}. 38 %% If init/1 returns {stop,Reason} or ignore, the process is 39 %% terminated and the function returns {error,Reason} or ignore, 40 %% respectively. 41 %% @end 42 %%------------------------------------------------------------------------- 43 start_link() -> 44 gen_fsm:start_link(?MODULE, [], []). 45 46 set_socket(Pid, Socket) when is_pid(Pid), is_port(Socket) -> 47 gen_fsm:send_event(Pid, {socket_ready, Socket}). 48 49 %%%------------------------------------------------------------------------ 50 %%% Callback functions from gen_server 51 %%%------------------------------------------------------------------------ 52 53 %%------------------------------------------------------------------------- 54 %% Func: init/1 55 %% Returns: {ok, StateName, StateData} | 56 %% {ok, StateName, StateData, Timeout} | 57 %% ignore | 58 %% {stop, StopReason} 59 %% @private 60 %%------------------------------------------------------------------------- 61 init([]) -> 62 process_flag(trap_exit, true), 63 64 %% 状态机启动之后的初始化状态 65 {ok, 'WAIT_FOR_SOCKET', #state{}}. 66 67 %%------------------------------------------------------------------------- 68 %% Func: StateName/2 69 %% Returns: {next_state, NextStateName, NextStateData} | 70 %% {next_state, NextStateName, NextStateData, Timeout} | 71 %% {stop, Reason, NewStateData} 72 %% @private 73 %%------------------------------------------------------------------------- 74 75 %% 创建客户端之后 set_socket 函数发送消息之后在这里被处理了 76 %% 大致逻辑是:收到通知,客户端连接socket到手,可以设置套接字选项并开始接收数据 77 'WAIT_FOR_SOCKET'({socket_ready, Socket}, State) when is_port(Socket) -> 78 % Now we own the socket 79 inet:setopts(Socket, [{active, once}, {packet, 2}, binary]), 80 {ok, {IP, _Port}} = inet:peername(Socket), 81 82 %% 确定了socket之后,状态机的下一个状态就是等着接收数据了 83 {next_state, 'WAIT_FOR_DATA', State#state{socket=Socket, addr=IP}, ?TIMEOUT}; 84 'WAIT_FOR_SOCKET'(Other, State) -> 85 error_logger:error_msg("State: 'WAIT_FOR_SOCKET'. Unexpected message: ~p\n", [Other]), 86 %% Allow to receive async messages 87 {next_state, 'WAIT_FOR_SOCKET', State}. 88 89 %% 显示来自客户端的事件 90 'WAIT_FOR_DATA'({data, Data}, #state{socket=S} = State) -> 91 ok = gen_tcp:send(S, Data), 92 {next_state, 'WAIT_FOR_DATA', State, ?TIMEOUT}; 93 94 'WAIT_FOR_DATA'(timeout, State) -> 95 error_logger:error_msg("~p Client connection timeout - closing.\n", [self()]), 96 {stop, normal, State}; 97 98 'WAIT_FOR_DATA'(Data, State) -> 99 io:format("~p Ignoring data: ~p\n", [self(), Data]), 100 {next_state, 'WAIT_FOR_DATA', State, ?TIMEOUT}. 101 102 %%------------------------------------------------------------------------- 103 %% Func: handle_event/3 104 %% Returns: {next_state, NextStateName, NextStateData} | 105 %% {next_state, NextStateName, NextStateData, Timeout} | 106 %% {stop, Reason, NewStateData} 107 %% @private 108 %%------------------------------------------------------------------------- 109 handle_event(Event, StateName, StateData) -> 110 {stop, {StateName, undefined_event, Event}, StateData}. 111 112 %%------------------------------------------------------------------------- 113 %% Func: handle_sync_event/4 114 %% Returns: {next_state, NextStateName, NextStateData} | 115 %% {next_state, NextStateName, NextStateData, Timeout} | 116 %% {reply, Reply, NextStateName, NextStateData} | 117 %% {reply, Reply, NextStateName, NextStateData, Timeout} | 118 %% {stop, Reason, NewStateData} | 119 %% {stop, Reason, Reply, NewStateData} 120 %% @private 121 %%------------------------------------------------------------------------- 122 handle_sync_event(Event, _From, StateName, StateData) -> 123 {stop, {StateName, undefined_event, Event}, StateData}. 124 125 %%------------------------------------------------------------------------- 126 %% Func: handle_info/3 127 %% Returns: {next_state, NextStateName, NextStateData} | 128 %% {next_state, NextStateName, NextStateData, Timeout} | 129 %% {stop, Reason, NewStateData} 130 %% @private 131 %%------------------------------------------------------------------------- 132 handle_info({tcp, Socket, Bin}, StateName, #state{socket=Socket} = StateData) -> 133 % Flow control: enable forwarding of next TCP message 134 inet:setopts(Socket, [{active, once}]), 135 ?MODULE:StateName({data, Bin}, StateData); 136 137 handle_info({tcp_closed, Socket}, _StateName, 138 #state{socket=Socket, addr=Addr} = StateData) -> 139 error_logger:info_msg("~p Client ~p disconnected.\n", [self(), Addr]), 140 {stop, normal, StateData}; 141 142 handle_info(_Info, StateName, StateData) -> 143 {noreply, StateName, StateData}. 144 145 %%------------------------------------------------------------------------- 146 %% Func: terminate/3 147 %% Purpose: Shutdown the fsm 148 %% Returns: any 149 %% @private 150 %%------------------------------------------------------------------------- 151 terminate(_Reason, _StateName, #state{socket=Socket}) -> 152 (catch gen_tcp:close(Socket)), 153 ok. 154 155 %%------------------------------------------------------------------------- 156 %% Func: code_change/4 157 %% Purpose: Convert process state when code is changed 158 %% Returns: {ok, NewState, NewStateData} 159 %% @private 160 %%------------------------------------------------------------------------- 161 code_change(_OldVsn, StateName, StateData, _Extra) -> 162 {ok, StateName, StateData}.
最后是app文件了:
1 %% tcp_server.app 文件 2 3 {application, tcp_server, 4 [ 5 {description, "Demo TCP server"}, 6 {vsn, "1.0"}, 7 {id, "tcp_server"}, 8 {modules, [tcp_listener, tcp_echo_fsm]}, 9 {registered, [tcp_server_sup, tcp_listener]}, 10 {applications, [kernel, stdlib]}, 11 %% 12 %% mod: 指定应用启动初始化的模块 13 %% 14 {mod, {tcp_server_app, []}}, 15 {env, []} 16 ] 17 }.
以上基本上都是个人查找资料过程的笔记,有理解错误的地方请评论指出,谢谢!