erlang 全局名字服务global_name_server

global_name_server

global_name_serverkernelcode_server,rex后第3个启动的gen_server

1. 主要用途:

  1. 注册全局名称服务 register_name/2,register_name/3
  2. 全局锁 set_lock/1,set_lock/2,set_lock/3
  3. 保证一个函数在加锁情况下执行trans/2,trans/3,trans/4

2. 申明

申明部分见kernel.erl

% kernel.erl
Global = #{id => global_name_server,
          start => {global, start_link, []},
          restart => permanent,
          shutdown => 2000,
          type => worker,
          modules => [global]},

3. global的自旋锁set_lock过程

% global.erl
set_lock({_ResourceId, _LockRequesterId}, [], _Retries, _Times) ->
  true;
set_lock({_ResourceId, _LockRequesterId} = Id, Nodes, Retries, Times) -> {times, Times}}),
  case set_lock_on_nodes(Id, Nodes) of
    true ->
      ?trace({set_lock_true, Id}),
      true;
    false = Reply when Retries =:= 0 ->
      % 尝试一段时间后,直接返回结果
      Reply;
    false ->
      random_sleep(Times),
      % 不断的进行尝试,自选锁机制
      set_lock(Id, Nodes, dec(Retries), Times + 1)
  end.
  
set_lock_on_nodes(_Id, []) ->
  true;
set_lock_on_nodes(Id, Nodes) ->
  case local_lock_check(Id, Nodes) of
    true ->
      Msg = {set_lock, Id},
      % 使用gen_server来保证,锁成功
      {Replies, _} = gen_server:multi_call(Nodes, global_name_server, Msg),
      ?trace({set_lock, {me, self()}, Id, {nodes, Nodes}, {replies, Replies}}),
      check_replies(Replies, Id, Replies);
    false = Reply ->
      Reply
  end.
  
% set_lock 的实现函数
handle_call({set_lock, Lock}, {Pid, _Tag}, S0) ->
    {Reply, S} = handle_set_lock(Lock, Pid, S0),
    {reply, Reply, S};

handle_set_lock(Id, Pid, S) ->
  ?trace({handle_set_lock, Id, Pid}),
  % step1 检查锁是否被占
  case can_set_lock(Id) of
    {true, PidRefs} ->
      % step2,检查是否已经锁了,没有的话插入锁
      case pid_is_locking(Pid, PidRefs) of
        true ->
          {true, S};
        false ->
          {true, insert_lock(Id, Pid, PidRefs, S)}
      end;
    false = Reply ->
      {Reply, S}
  end.

can_set_lock({ResourceId, LockRequesterId}) ->
  case ets:lookup(global_locks, ResourceId) of
    [{ResourceId, LockRequesterId, PidRefs}] ->
      % 这是一个可重入式锁
      {true, PidRefs};
    [{ResourceId, _LockRequesterId2, _PidRefs}] ->
      false;
    [] ->
      {true, []}
  end.

4. register_name 过程

假设一个分布式系统有N个非hidden节点(erlang:length(nodes()) = N)

  1. boss节点加锁gen_server:multi_call([Boss]], global_name_server, {set_lock,{global,pid()}})
  2. 在所有节点上加锁gen_server:multi_call(Nodes, global_name_server, {set_lock,{global,pid()}})
  3. 在所有节点上注册 gen_server:multi_call(Nodes,global_name_server,{register, Name, Pid, Method})
  4. 在其余N个节点删除锁 gen_server:multi_call(Nodes, global_name_server, {del_lock, Id})
  5. boss节点删除锁 gen_server:multi_call([Boss]], global_name_server, {del_lock, Id})

一共要gen_servre:call N x 3(加锁,注册,删锁) 次,效率喜人

% global.erl
%% 函数入口
trans_all_known(Fun) ->
    Id = {?GLOBAL_RID, self()},
    % step 1 and step 2
    Nodes = set_lock_known(Id, 0),
    try
        % step 3
        Fun(Nodes)
    after
        % step 4 and step 5
        delete_global_lock(Id, Nodes)
    end.

set_lock_known(Id, Times) -> 
    Known = get_known(),
    Nodes = [node() | Known],
    Boss = the_boss(Nodes),
    %% Use the  same convention (a boss) as lock_nodes_safely. Optimization.
    % step 1,先在boss节点设置锁 {?GLOBAL_RID, self()}
    case set_lock_on_nodes(Id, [Boss]) of
        true ->
            % step2,在所有节点设置锁
            case lock_on_known_nodes(Id, Known, Nodes) of
                true ->
                    Nodes;
                false -> 
                    % 万一不成功,还得先解除,这下子IO次数多了去了。
                    del_lock(Id, [Boss]),
                    random_sleep(Times),
                    set_lock_known(Id, Times+1)
            end;
        false ->
            random_sleep(Times),
            set_lock_known(Id, Times+1)
    end.

register_name(Name, Pid, Method0) when is_pid(Pid) ->
    Method = allow_tuple_fun(Method0),
    Fun = fun(Nodes) ->
        % step3,在锁成功设置之后,向所有节点注册Name
        case (where(Name) =:= undefined) andalso check_dupname(Name, Pid) of
            true ->
                gen_server:multi_call(Nodes,
                                      global_name_server,
                                      {register, Name, Pid, Method}),
                yes;
            _ ->
                no
        end
    end,
    ?trace({register_name, self(), Name, Pid, Method}),
    gen_server:call(global_name_server, {registrar, Fun}, infinity).
    
delete_global_lock(LockId, Nodes) ->
    TheBoss = the_boss(Nodes),
    % step4 其余节点删除锁
    del_lock(LockId, lists:delete(TheBoss, Nodes)),
    % step5 Boss节点删除锁
    del_lock(LockId, [TheBoss]).
    

5. 优缺点

缺点
  1. 不能对hidden节点进行register_name加锁操作。
  2. 自旋锁对多个节点操作,IO次数太高,不如redis分布式锁来的轻巧。
优点
  1. 语言层面的实现,不用借助第三方
  2. 分布式的存储,所有的节点上都保留一份,所以在读方面会占优势

6.总结

了解系统实现全局锁的实现方式与缺陷,在使用当中避免一些性能瓶颈。

7. 参考文献

  1. https://github.com/erlang/otp/blob/master/lib/kernel/src/global.erl
  2. https://github.com/erlang/otp/blob/master/lib/kernel/src/kernel.erl

你可能感兴趣的:(erlang 全局名字服务global_name_server)