我们在程序开发过程中,存在如下的一段代码:
F = fun () ->
Recs = mnesia:index_read(table, Index, indexfield),
NewRecs = some_ops_on(Recs),
mnesia:write(NewRecs)
end,
mnesia:transaction(F).
这段代码在运行时并发一直上不去,读者是否清楚其原因呢?
文章的末尾总结了一些mnesia的使用注意事项,对源码没有兴趣的读者可以直接看末尾。
仔细分析一下mnesia:index_read/3的工作过程,就豁然开朗了,代码版本R15B03:
mnesia.erl
index_read(Tab, Key, Attr) ->
case get(mnesia_activity_state) of
{?DEFAULT_ACCESS, Tid, Ts} ->
index_read(Tid, Ts, Tab, Key, Attr, read);
{Mod, Tid, Ts} ->
Mod:index_read(Tid, Ts, Tab, Key, Attr, read);
_ ->
abort(no_transaction)
end.
index_read(Tid, Ts, Tab, Key, Attr, LockKind)
when is_atom(Tab), Tab /= schema ->
case element(1, Tid) of
ets ->
dirty_index_read(Tab, Key, Attr); % Should be optimized?
tid ->
Pos = mnesia_schema:attr_tab_to_pos(Tab, Attr),
case LockKind of
read ->
case has_var(Key) of
false ->
Store = Ts#tidstore.store,
Objs = mnesia_index:read(Tid, Store, Tab, Key, Pos),
%%进入mnesia_index:read进行索引读
Pat = setelement(Pos, val({Tab, wild_pattern}), Key),
add_written_match(Store, Pat, Tab, Objs);
true ->
abort({bad_type, Tab, Attr, Key})
end;
_ ->
abort({bad_type, Tab, LockKind})
end;
_Protocol ->
dirty_index_read(Tab, Key, Attr)
end;
index_read(_Tid, _Ts, Tab, _Key, _Attr, _LockKind) ->
abort({bad_type, Tab}).
mnesia_index.erl
read(Tid, Store, Tab, IxKey, Pos) ->
ResList = mnesia_locker:ixrlock(Tid, Store, Tab, IxKey, Pos),
%%上锁的同时读取记录
%% Remove all tuples which don't include Ixkey, happens when Tab is a bag
case val({Tab, setorbag}) of
bag ->
mnesia_lib:key_search_all(IxKey, Pos, ResList);
_ ->
ResList
end.
mnesia_locker.erl
ixrlock(Tid, Store, Tab, IxKey, Pos) ->
case val({Tab, where_to_read}) of
nowhere ->
mnesia:abort({no_exists, Tab});
Node ->
%%% Old code
%% R = l_request(Node, {ix_read, Tid, Tab, IxKey, Pos}, Store),
%% rlock_get_reply(Node, Store, Tab, R)
case need_lock(Store, Tab, ?ALL, read) of
no when Node =:= node() ->
ix_read_res(Tab,IxKey,Pos);
_ -> %% yes or need to get the result from other node
R = l_request(Node, {ix_read, Tid, Tab, IxKey, Pos}, Store),
%%首次到达时需要请求表锁
rlock_get_reply(Node, Store, Tab, R)
%%从锁授权中得到行记录
end
end.
l_request(Node, X, Store) ->
{?MODULE, Node} ! {self(), X},
%%向锁管理器请求锁,锁内容为ix_read
l_req_rec(Node, Store).
%%同步等待锁请求的返回
%%注意,这里也是大量进程所阻塞的地方,即等待锁请求的返回,这是由index_read后的write产生的,write全局锁与本地表锁冲突
l_req_rec(Node, Store) ->
?ets_insert(Store, {nodes, Node}),
receive
%%等待锁请求响应消息
{?MODULE, Node, Reply} ->
Reply;
{mnesia_down, Node} ->
{not_granted, {node_not_running, Node}}
end.
再来分析ix_read锁请求到达锁管理器后的处理:
mnesia_locker.erl
loop(State) ->
receive
…
{From, {ix_read, Tid, Tab, IxKey, Pos}} ->
case ?ets_lookup(mnesia_sticky_locks, Tab) of
[] ->
set_read_lock_on_all_keys(Tid,From,Tab,IxKey,Pos),
%%此处不考虑粘着锁,而直接考虑在该场景下的效果
loop(State);
[{_,N}] when N == node() ->
set_read_lock_on_all_keys(Tid,From,Tab,IxKey,Pos),
loop(State);
[{_,N}] ->
Req = {From, {ix_read, Tid, Tab, IxKey, Pos}},
From ! {?MODULE, node(), {switch, N, Req}},
loop(State)
end;
…
set_read_lock_on_all_keys(Tid, From, Tab, IxKey, Pos) ->
Oid = {Tab,?ALL},
Op = {ix_read,IxKey, Pos},
Lock = read,
case can_lock(Tid, Lock, Oid, {no, bad_luck}) of
{yes, Default} ->
Reply = grant_lock(Tid, Op, Lock, Oid, Default),
%%这里进行锁的授权
reply(From, Reply);
{{no, Lucky},_} ->
C = #cyclic{op = Op, lock = Lock, oid = Oid, lucky = Lucky},
?dbg("Rejected ~p ~p ~p ~p ~n", [Tid, Oid, Lock, Lucky]),
reply(From, {not_granted, C});
{{queue, Lucky},_} ->
?dbg("Queued ~p ~p ~p ~p ~n", [Tid, Oid, Lock, Lucky]),
%% Append to queue: Nice place for trace output
?ets_insert(mnesia_lock_queue,
#queue{oid = Oid, tid = Tid, op = Op,
pid = From, lucky = Lucky}),
?ets_insert(mnesia_tid_locks, {Tid, Oid, {queued, Op}})
end.
…
grant_lock(Tid, {ix_read,IxKey,Pos}, Lock, Oid = {Tab, _}, Default) ->
try
Res = ix_read_res(Tab, IxKey,Pos),
%%从索引表中读出行记录
set_lock(Tid, Oid, Lock, Default),
{granted, Res, [?ALL]}
%%可以清晰的看出,实际的锁授权即为表锁,Oid为{Tab,?ALL}
catch _:_ ->
{not_granted, {no_exists, Tab, {index, [Pos]}}}
end;
…
锁请求返回时,事务线程的处理:
mnesia_locker.erl
rlock_get_reply(Node, Store, Tab, {granted, V, RealKeys}) ->
%% Kept for backwards compatibility, keep until no old nodes
%% are available
L = fun(K) -> ?ets_insert(Store, {{locks, Tab, K}, read}) end,
lists:foreach(L, RealKeys),
?ets_insert(Store, {nodes, Node}),
V;
返回值即为查找到的行记录。
由此可见mnesia的索引存在的问题,即index_read为表锁,极大地影响并发,应慎用。
这里总结一些mnesia的使用注意事项:
1.一次事务读的行记录越少越好,跨越的表越少越好,因为每一次读都会产生一个读锁,记录和表越多,与写锁冲突的几率就越大,阻塞写的几率就越大;
2.如果多个表的主键相同,应该尽量将这些表合并,除非:
a)表的规模可能很大,导致一个ets表存不下这些数据,此时可以考虑拆字段或按id切分;
b)表只有很少的字段会频繁读到,一次读出全部内容的几率很小;
c)关于此条,也可以结合数据库表设计原则进行,但设计时一定要注意,mnesia只是一个kv存储;
3.不要用index_read,因为index_read会锁住全表,并严重阻塞写操作,使得读写较为平均的并发受到很大限制。如果需要索引,那么存两张表,一张专门用于索引,索引与主键一一映射;
4.majority表使用majority事务,这个事务至少有两次同步的网络请求和一次异步的网络请求,这个代价较大,而普通事务只有一次同步的网络请求和一次异步的网络请求,同步事务有两次同步的网络请求;
5.mnesia事务内部的操作应越短越好,因为访问的记录产生的锁只在事务提交时释放,如果内部无关操作太多,可能会阻塞其它请求;