过程,由master的tick定时器定期往所有worker发送定时事件,systemstat和membasestat订阅这些事件然后定时上报状态
该tick守候进程只在master节点上启动
参与的类有
采集执行模块:
system_stats_collector 采集CPU,内存,磁盘情况
stats_collector 通过二进制命令往membase采集bucket数据
stats_archiver把采集的数据存储到mnesia中
stats_reader 读取模块
他们除了stats_reader外,其他的都是gen_server
辅助模块:
ns_tick 定时器守候进程,只在master运行
mb_master 逻辑模块,根据情况决定当前节点是master,candidate或者worker
mb_master_sup master节点的监控模块,把master节点需要的服务启动,如ns_tick
ns_pubsub 订阅辅助模块,采集执行体中相关的gen_server模块都是通过它从而获得订阅通知功能。
代码细节分析
mb_master判断和启动是否成为master
Master节点启动 init([]) -> Self = self(), ns_pubsub:subscribe( ns_config_events, fun ({nodes_wanted, Nodes}, State) -> Self ! {peers, Nodes}, State; (_, State) -> State end, empty), erlang:process_flag(trap_exit, true), {ok, _} = timer:send_interval(?HEARTBEAT_INTERVAL, send_heartbeat), case ns_node_disco:nodes_wanted() of [N] = P when N == node() -> ?log_info("I'm the only node, so I'm the master.", []), {ok, master, start_master(#state{last_heard=now(), peers=P})}; Peers when is_list(Peers) -> case lists:member(node(), Peers) of false -> %% We're a worker, but don't know who the master is yet ?log_info("Starting as worker. Peers: ~p", [Peers]), {ok, worker, #state{last_heard=now()}}; true -> %% We're a candidate ?log_info("Starting as candidate. Peers: ~p", [Peers]), {ok, candidate, #state{last_heard=now(), peers=Peers}} end end. 自动接管: handle_info(send_heartbeat, candidate, #state{peers=Peers} = StateData) -> send_heartbeat(Peers, candidate, StateData), case timer:now_diff(now(), StateData#state.last_heard) >= ?TIMEOUT of true -> %% Take over ?log_info("Haven't heard from a higher priority node or " "a master, so I'm taking over.", []), {ok, Pid} = mb_master_sup:start_link(), {next_state, master, StateData#state{child=Pid, master=node()}}; false -> {next_state, candidate, StateData} end;
系统运行一个tick服务(gen_server),它定义了一个定时器(timer:send_interval(Interval, tick)),在定时器触发时对所有的node发出tick事件 handle_info(tick, State) -> misc:verify_name(?MODULE), % MB-3180: make sure we're still registered Now = misc:time_to_epoch_ms_int(now()), lists:foreach(fun (Node) -> gen_event:notify({?EVENT_MANAGER, Node}, {tick, Now}) end, [node() | nodes()]), {noreply, State#state{time=Now}};
-module(ns_pubsub). -behaviour(gen_event). subscribe(Name) -> subscribe(Name, msg_fun(self()), ignored). subscribe(Name, Fun, State) -> Ref = make_ref(), ok = gen_event:add_sup_handler(Name, {?MODULE, Ref}, #state{func=Fun, func_state=State}), Ref. msg_fun(Pid) -> fun (Event, ignored) -> Pid ! Event, ignored end. handle_event(Event, State = #state{func=Fun, func_state=FS}) -> NewState = Fun(Event, FS), {ok, State#state{func_state=NewState}};
系统监控:依赖于portsigar(sigar system-level stats for erlang) /RabbitMQ直接调用/usr/bin/vm_stat,/usr/sbin/prtconf和读取‘文件’"/proc/meminfo“
init([]) -> Path = path_config:component_path(bin, "sigar_port"), Port = try open_port({spawn_executable, Path}, [stream, use_stdio, exit_status, binary, eof, {arg0, lists:flatten(io_lib:format("portsigar for ~s", [node()]))}]) of X -> ns_pubsub:subscribe(ns_tick_event), X catch error:enoent -> ?log_warning("~s is missing. Will not collect system-level stats", [Path]), undefined end, {ok, #state{port = Port}}. handle_info({tick, TS}, #state{port = Port, prev_sample = PrevSample}) -> case flush_ticks(0) of 0 -> ok; N -> ?log_warning("lost ~p ticks", [N]) end, port_command(Port, <<0:32/native>>), Binary = recv_data(Port, [], ?STATS_BLOCK_SIZE), {Stats0, NewPrevSample} = unpack_data(Binary, PrevSample), case Stats0 of undefined -> ok; _ -> Stats = lists:sort(Stats0), gen_event:notify(ns_stats_event, {stats, "@system", #stat_entry{timestamp = TS, values = lists:sort(Stats)}}) end, {noreply, #state{port = Port, prev_sample = NewPrevSample}}; 协议解析 unpack_data(Bin, PrevSample) -> <<Version:32/native, StructSize:32/native, CPULocalMS:64/native, CPUIdleMS:64/native, SwapTotal:64/native, SwapUsed:64/native, _SwapPageIn:64/native, _SwapPageOut:64/native, MemTotal:64/native, MemUsed:64/native, MemActualUsed:64/native, MemActualFree:64/native>> = Bin, StructSize = erlang:size(Bin), Version = 0, RawStats = [{cpu_local_ms, CPULocalMS}, {cpu_idle_ms, CPUIdleMS}, {swap_total, SwapTotal}, {swap_used, SwapUsed}, %% {swap_page_in, SwapPageIn}, %% {swap_page_out, SwapPageOut}, {mem_total, MemTotal}, {mem_used, MemUsed}, {mem_actual_used, MemActualUsed}, {mem_actual_free, MemActualFree}], NowSamples = case PrevSample of undefined -> undefined; _ -> {_, OldCPULocal} = lists:keyfind(cpu_local_ms, 1, PrevSample), {_, OldCPUIdle} = lists:keyfind(cpu_idle_ms, 1, PrevSample), LocalDiff = CPULocalMS - OldCPULocal, IdleDiff = CPUIdleMS - OldCPUIdle, RV1 = lists:keyreplace(cpu_local_ms, 1, RawStats, {cpu_local_ms, LocalDiff}), RV2 = lists:keyreplace(cpu_idle_ms, 1, RV1, {cpu_idle_ms, IdleDiff}), [{mem_free, MemTotal - MemUsed}, {cpu_utilization_rate, try 100 * (LocalDiff - IdleDiff) / LocalDiff catch error:badarith -> 0 end} | RV2] end, {NowSamples, RawStats}.
stats_collector
连接本地memcached采集memcached状态数据。
持久化,stats_archived
do_handle_info({stats, Bucket, Sample}, State = #state{bucket=Bucket}) -> Tab = table(Bucket, minute), {atomic, ok} = mnesia:transaction(fun () -> mnesia:write(Tab, Sample, write) end, ?RETRIES), gen_event:notify(ns_stats_event, {sample_archived, Bucket, Sample}), {noreply, State};
resample(Bucket, Period, Step, N) -> Seconds = N * Step, Tab = stats_archiver:table(Bucket, Period), case mnesia:dirty_last(Tab) of '$end_of_table' -> {ok, []}; Key -> Oldest = Key - Seconds * 1000 + 500, Handle = qlc:q([Sample || #stat_entry{timestamp=TS} = Sample <- mnesia:table(Tab), TS > Oldest]), F = fun (#stat_entry{timestamp = T} = Sample, {T1, Acc, Chunk}) -> case misc:trunc_ts(T, Step) of T1 -> {T1, Acc, [Sample|Chunk]}; T2 when T1 == undefined -> {T2, Acc, [Sample]}; T2 -> {T2, [avg(T1, Chunk)|Acc], [Sample]} end end, case mnesia:activity(async_dirty, fun qlc:fold/3, [F, {undefined, [], []}, Handle]) of {error, _, _} = Error -> Error; {undefined, [], []} -> {ok, []}; {T, Acc, LastChunk} -> {ok, lists:reverse([avg(T, LastChunk)|Acc])} end end.