今天分析RabbitMQ消息的持久化,即客户端发送一条持久化的MQ消息后,服务端做了哪些事情。
下面是客户端的发送代码:
$client = new Client('127.0.0.1', 5672, 'guest', 'guest');
//设置正常交换机、队列
$type = 'topic';
$routingKey = 'hello';
$exchangeName = 'hello_exchange'
$exchange = new Exchange($client, $exchangeName, $type);
$exchange->setDurable(true);
//队列
$queue = new Queue(
$client, $this->queueName, [
new Consumer(
function (AMQPMessage $msg) {
var_dump($msg);
}
),
]
);
$binding = new Binding($exchange, $queue);
$binding->setRoutingKey($routingKey);
$client->register($binding);
$message = new Message("hello" . str_repeat('123456789', 13));
$res = $exchange->publish($message, $routingKey);
分析下网络包,发送消息的时候,实际上是往服务端发送basic.publish命令。
调用链分析
入口在rabbit_channel文件:
handle_method(#'basic.publish'{exchange = ExchangeNameBin,
routing_key = RoutingKey,
mandatory = Mandatory},
Content, State = #ch{virtual_host = VHostPath,
tx = Tx,
channel = ChannelNum,
confirm_enabled = ConfirmEnabled,
trace_state = TraceState,
user = #user{username = Username},
conn_name = ConnName,
delivery_flow = Flow}) ->
……
case rabbit_basic:message(ExchangeName, RoutingKey, DecodedContent) of
{ok, Message} ->
Delivery = rabbit_basic:delivery(
Mandatory, DoConfirm, Message, MsgSeqNo),
QNames = rabbit_exchange:route(Exchange, Delivery),
DQ = {Delivery#delivery{flow = Flow}, QNames},
{noreply, case Tx of
none -> deliver_to_queues(DQ, State1);
{Msgs, Acks} -> Msgs1 = queue:in(DQ, Msgs),
State1#ch{tx = {Msgs1, Acks}}
end};
end;
上面删除了一些非关键代码,这里看是否有事务,如果没事务则 通过 deliver_to_queues发送, 有事务先进队列,今天主要分析无事务的处理过程。
deliver_to_queues({Delivery = #delivery{message = Message = #basic_message{
exchange_name = XName},
mandatory = Mandatory,
confirm = Confirm,
msg_seq_no = MsgSeqNo},
DelQNames}, State = #ch{queue_names = QNames,
queue_monitors = QMons}) ->
Qs = rabbit_amqqueue:lookup(DelQNames),
DeliveredQPids = rabbit_amqqueue:deliver(Qs, Delivery),
后者调用 rabbit_amqqueue:deliver来处理:
deliver(Qs, Delivery = #delivery{flow = Flow}) ->
{MPids, SPids} = qpids(Qs),
QPids = MPids ++ SPids,
MMsg = {deliver, Delivery, false},
SMsg = {deliver, Delivery, true},
delegate:cast(MPids, MMsg),
delegate:cast(SPids, SMsg),
QPids.
deliver的逻辑就 比较简单,分主、从进程ID,如果没有开启镜像队列,从进程ID是空的,今天先不分析镜像队列。
发送deliver消息到主进程,这个进程是rabbit-amqueue-process。
再来看rabbit-amqueue-process是如何处理的:
handle_cast({deliver, Delivery = #delivery{sender = Sender,
flow = Flow}, SlaveWhenPublished},
State = #q{senders = Senders}) ->
%% SlaveWhenPublished 只有在从的时候才为true
noreply(deliver_or_enqueue(Delivery, SlaveWhenPublished, State1));
中间的代码还比较多,就不一一贴了,大概说下,deliver_or_enqueue会调用attempt_delivery,然后调用到rabbit-variable-queue:publish
publish(Msg = #basic_message { is_persistent = IsPersistent, id = MsgId },
MsgProps = #message_properties { needs_confirming = NeedsConfirming },
IsDelivered, _ChPid, _Flow,
State = #vqstate { q1 = Q1, q3 = Q3, q4 = Q4,
qi_embed_msgs_below = IndexMaxSize,
next_seq_id = SeqId,
in_counter = InCount,
durable = IsDurable,
unconfirmed = UC }) ->
IsPersistent1 = IsDurable andalso IsPersistent,
MsgStatus = msg_status(IsPersistent1, IsDelivered, SeqId, Msg, MsgProps, IndexMaxSize),
{MsgStatus1, State1} = maybe_write_to_disk(false, false, MsgStatus, State),
调用maybe_write_to_disk 进行消息的持久化:
maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus, State) ->
maybe_write_msg_to_disk用来将消息持久化,maybe_write_index_to_disk用来将索引持久化。
maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus, State) ->
{MsgStatus1, State1} = maybe_write_msg_to_disk(ForceMsg, MsgStatus, State),
maybe_write_index_to_disk(ForceIndex, MsgStatus1, State1).
如果消息大小小于配置文件中的queue_index_embed_msgs_below,
则persist_to返回queue_index,反之返回 msg_store,这个参数默认是4096,即如果消息体大小小于4096,是不会将消息写到消息持久化文件,而是写到索引文件中。
消息的持久化由文件rabbit_msgstore负责,msgstorewrite会调用writemessage进行消息的保存:
maybe_write_msg_to_disk(Force, MsgStatus = #msg_status {
msg = Msg, msg_id = MsgId,
is_persistent = IsPersistent },
State = #vqstate{ msg_store_clients = MSCState,
disk_write_count = Count})
when Force orelse IsPersistent ->
case persist_to(MsgStatus) of
msg_store -> ok = msg_store_write(MSCState, IsPersistent, MsgId,
prepare_to_store(Msg)),
{MsgStatus#msg_status{msg_in_store = true},
State#vqstate{disk_write_count = Count + 1}};
queue_index -> {MsgStatus, State}
end;
这里的逻辑就比较简单了,将消息内容到当前文件,再判断当前文件的大小,如果需要,则创建一个新的持久化文件。
这里讲一下segment,每个segment对应一个文件(所在的目录在mnesia数据目录下的msg_store_persistent)。每个文件最多可以保存SEGMEN_ENTRY_COUNT(16384)个消息索引信息。
这些文件是以整数来命名的,某条消息对应哪个segment文件呢?用消息索引本身对SEGMENT_ENTRY_COUNT取整,相关代码可以看下
rabbit_queue_index:add_to_journal。
最后再看索引的持久化
maybe_write_msg_to_disk(Force, MsgStatus = #msg_status {
msg = Msg, msg_id = MsgId,
is_persistent = IsPersistent },
State = #vqstate{ msg_store_clients = MSCState,
disk_write_count = Count})
when Force orelse IsPersistent ->
case persist_to(MsgStatus) of
msg_store -> ok = msg_store_write(MSCState, IsPersistent, MsgId,
prepare_to_store(Msg)),
{MsgStatus#msg_status{msg_in_store = true},
State#vqstate{disk_write_count = Count + 1}};
queue_index -> {MsgStatus, State}
end;
索引通过rabbit_queue_index:publish 来落盘:
publish(MsgOrId, SeqId, MsgProps, IsPersistent, JournalSizeHint,
State = #qistate{unconfirmed = UC,
unconfirmed_msg = UCM}) ->
MsgId = case MsgOrId of
#basic_message{id = Id} -> Id;
Id when is_binary(Id) -> Id
end,
?MSG_ID_BYTES = size(MsgId),
%%JournalHd1对应journal.jif
{JournalHdl, State1} =
get_journal_handle(
case {MsgProps#message_properties.needs_confirming, MsgOrId} of
{true, MsgId} -> UC1 = gb_sets:add_element(MsgId, UC),
State#qistate{unconfirmed = UC1};
{true, _} -> UCM1 = gb_sets:add_element(MsgId, UCM),
State#qistate{unconfirmed_msg = UCM1};
{false, _} -> State
end),
file_handle_cache_stats:update(queue_index_journal_write),
{Bin, MsgBin} = create_pub_record_body(MsgOrId, MsgProps),
ok = file_handle_cache:append(
JournalHdl, [<<(case IsPersistent of
true -> ?PUB_PERSIST_JPREFIX;
false -> ?PUB_TRANS_JPREFIX
end):?JPREFIX_BITS,
SeqId:?SEQ_BITS, Bin/binary,
(size(MsgBin)):?EMBEDDED_SIZE_BITS>>, MsgBin]),
maybe_flush_journal(
JournalSizeHint,
add_to_journal(SeqId, {IsPersistent, Bin, MsgBin}, State1)).
索引文件会先写到 journal缓存中,再定期刷到磁盘中,相关参数为
queue_index_max_journal_entries,
判断当前写入次数是否达到queue_index_max_journal_entries,是则进行刷盘到索引持久化文件。
实际刷盘是在 rabbit_variable_queue:handle_pre_hibernate中异步去刷的,这里不详述。
索引持久化文件在mnesia目录的queues目录下,文件扩展名为idx。
如何保证消息的不丢呢,即如果写入journal文件成功了,但没有刷新到索引的持久化文件中如何恢复,可以看下代码 rabbit_variable_queue:init, RabbitMQ启动的时候启动每个队列之前会调用它来从journal中恢复索引和消息。
最后总结
持久化分消息体和索引的持久化,如果消息体小于queue_index_embed_msgs_below,则将消息写入到索引文件中,只进行1次磁盘操作,反之要写2次磁盘:消息体+索引,消息体写入到segment文件中,一个segment可以保存16384条消息。
为了加快写入的性能,写入消息体时是追加方式进行的;索引的持久化则是先追加到journal文件中,再异步刷新到索引文件中。
RabbitMQ网络框架代码分析二:命令分发
RabbitMQ网络框架代码分析
从RabbitMQ Channel设计看连接复用