都知道redis有两种持久化方式,一种是AOF(Append Only File),一种是RDB(Redis DataBase),区别这里不介绍,大家可以去Redis 持久化之RDB和AOF对比整理
你可能会有这个疑问,或者面试官会问你,AOF什么时候执行?RDB什么时候执行?这里从源码角度解释这两个问题
借鉴:
redis 源码探讨–AOF实现
Redis源码剖析之RDB
下面是Redis服务器中处理客户端命令的函数,每一个对缓存数据的操作都要经过这个函数
/* If this function gets called we already read a whole
* command, arguments are in the client argv/argc fields.
* processCommand() execute the command or prepare the
* server for a bulk read from the client.
*
* If C_OK is returned the client is still alive and valid and
* other operations can be performed by the caller. Otherwise
* if C_ERR is returned the client was destroyed (i.e. after QUIT). */
int processCommand(client *c) {
/*删除不重要的逻辑,大部分是前置校验*/
.......
/* Exec the command */
/*这段代码的作用是根据客户端的状态和当前执行的命令来决定是立即执行命令还是将其加入待执行队列,并处理可能的阻塞情况*/
if (c->flags & CLIENT_MULTI &&
c->cmd->proc != execCommand &&
c->cmd->proc != discardCommand &&
c->cmd->proc != multiCommand &&
c->cmd->proc != watchCommand &&
c->cmd->proc != quitCommand &&
c->cmd->proc != resetCommand)
{ /*这里是加入队列执行*/
queueMultiCommand(c, cmd_flags);
addReply(c,shared.queued);
} else {/*这里是直接执行*/
int flags = CMD_CALL_FULL;
if (client_reprocessing_command) flags |= CMD_CALL_REPROCESSING;
call(c,flags);
if (listLength(server.ready_keys))
handleClientsBlockedOnKeys();
}
return C_OK;
}
这里不关注加入队列执行的情况,只关注直接执行的情况,注意call(c,flags)
这一行代码
下面是call(client *c, int flags)
的代码逻辑
/*核心代码*/
void call(client *c, int flags) {
......./*删除干扰理解的代码*/
/*下面就是请求已经执行完,开始做AOF操作,主要关注alsoPropagate方法*/
/* Propagate the command into the AOF and replication link.
* We never propagate EXEC explicitly, it will be implicitly
* propagated if needed (see propagatePendingCommands).
* 上一行很重要,说明alsoPropagate只是做了一些AOF的前置准备,实际执行AOF还是要看propagatePendingCommands方法
* Also, module commands take care of themselves */
if (flags & CMD_CALL_PROPAGATE &&
(c->flags & CLIENT_PREVENT_PROP) != CLIENT_PREVENT_PROP &&
c->cmd->proc != execCommand &&
!(c->cmd->flags & CMD_MODULE))
{
int propagate_flags = PROPAGATE_NONE;
/* Check if the command operated changes in the data set. If so
* set for replication / AOF propagation. */
/*如果数据集发生了更改,那么可以进行复制(replication)或AOF传播(AOF propagation)的设置。*/
if (dirty) propagate_flags |= (PROPAGATE_AOF|PROPAGATE_REPL);
/* If the client forced AOF / replication of the command, set
* the flags regardless of the command effects on the data set. */
/*处理Redis命令时,根据客户端请求的不同,确定是否需要将命令的执行结果写入AOF文件或进行数据复制。在这种情况下,无论命令对数据集是否有实际影响,都会设置标志来满足客户端的要求。*/
if (c->flags & CLIENT_FORCE_REPL) propagate_flags |= PROPAGATE_REPL;
if (c->flags & CLIENT_FORCE_AOF) propagate_flags |= PROPAGATE_AOF;
/* However prevent AOF / replication propagation if the command
* implementation called preventCommandPropagation() or similar,
* or if we don't have the call() flags to do so. */
if (c->flags & CLIENT_PREVENT_REPL_PROP ||
c->flags & CLIENT_MODULE_PREVENT_REPL_PROP ||
!(flags & CMD_CALL_PROPAGATE_REPL))
propagate_flags &= ~PROPAGATE_REPL;
if (c->flags & CLIENT_PREVENT_AOF_PROP ||
c->flags & CLIENT_MODULE_PREVENT_AOF_PROP ||
!(flags & CMD_CALL_PROPAGATE_AOF))
propagate_flags &= ~PROPAGATE_AOF;
/* Call alsoPropagate() only if at least one of AOF / replication
* propagation is needed. */
/*只要不等于PROPAGATE_NONE,不要认为这里就是实际执行AOF到文件的操作,*/
if (propagate_flags != PROPAGATE_NONE)
alsoPropagate(c->db->id,c->argv,c->argc,propagate_flags);
}
......./*删除一些干扰理解的代码*/
/* Do some maintenance job and cleanup */
/*这里面才是给AOF记录的操作*/
afterCommand(c);
}
下面是alsoPropagate函数的实现
/* Used inside commands to schedule the propagation of additional commands
* after the current command is propagated to AOF / Replication.
*
* dbid is the database ID the command should be propagated into.
* Arguments of the command to propagate are passed as an array of redis
* objects pointers of len 'argc', using the 'argv' vector.
*
* The function does not take a reference to the passed 'argv' vector,
* so it is up to the caller to release the passed argv (but it is usually
* stack allocated). The function automatically increments ref count of
* passed objects, so the caller does not need to. */
void alsoPropagate(int dbid, robj **argv, int argc, int target) {
robj **argvcopy;
int j;
if (!shouldPropagate(target))
return;
argvcopy = zmalloc(sizeof(robj*)*argc);
for (j = 0; j < argc; j++) {
argvcopy[j] = argv[j];
incrRefCount(argv[j]);
}
redisOpArrayAppend(&server.also_propagate,dbid,argvcopy,argc,target);
}
下面是redisOpArrayAppend函数的实现
int redisOpArrayAppend(redisOpArray *oa, int dbid, robj **argv, int argc, int target) {
redisOp *op;
int prev_capacity = oa->capacity;
if (oa->numops == 0) {
oa->capacity = 16;
} else if (oa->numops >= oa->capacity) {
oa->capacity *= 2;
}
if (prev_capacity != oa->capacity)
oa->ops = zrealloc(oa->ops,sizeof(redisOp)*oa->capacity);
op = oa->ops+oa->numops;
op->dbid = dbid;
op->argv = argv;
op->argc = argc;
op->target = target;
oa->numops++;
return oa->numops;
}
/* This is called after a command in call, we can do some maintenance job in it. */
void afterCommand(client *c) {
UNUSED(c);
/* Should be done before trackingHandlePendingKeyInvalidations so that we
* reply to client before invalidating cache (makes more sense) */
postExecutionUnitOperations();
....../*删除干扰代码*/
}
void postExecutionUnitOperations(void) {
....../*删除干扰代码*/
/* If we are at the top-most call() and not inside a an active module
* context (e.g. within a module timer) we can propagate what we accumulated. */
propagatePendingCommands();
....../*删除干扰代码*/
}
/* Handle the alsoPropagate() API to handle commands that want to propagate
* multiple separated commands. Note that alsoPropagate() is not affected
* by CLIENT_PREVENT_PROP flag. */
static void propagatePendingCommands(void) {
if (server.also_propagate.numops == 0)
return;
int j;
redisOp *rop;
/* If we got here it means we have finished an execution-unit.
* If that unit has caused propagation of multiple commands, they
* should be propagated as a transaction */
/*如果我们到达这里,这意味着我们已经完成了一个执行单元。如果该单元导致多个命令的传播,则应将它们作为事务传播*/
int transaction = server.also_propagate.numops > 1;
/* In case a command that may modify random keys was run *directly*
* (i.e. not from within a script, MULTI/EXEC, RM_Call, etc.) we want
* to avoid using a transaction (much like active-expire) */
/*如果可能修改随机键的命令是直接运行的(即不是从脚本、MULTIEXEC、RM_Call 等中运行),我们希望避免使用事务(很像 active-expire)*/
if (server.current_client &&
server.current_client->cmd &&
server.current_client->cmd->flags & CMD_TOUCHES_ARBITRARY_KEYS)
{
transaction = 0;
}
if (transaction) {
/* We use dbid=-1 to indicate we do not want to replicate SELECT.
* It'll be inserted together with the next command (inside the MULTI) */
propagateNow(-1,&shared.multi,1,PROPAGATE_AOF|PROPAGATE_REPL);
}
for (j = 0; j < server.also_propagate.numops; j++) {
rop = &server.also_propagate.ops[j];
serverAssert(rop->target);
propagateNow(rop->dbid,rop->argv,rop->argc,rop->target);
}
if (transaction) {
/* We use dbid=-1 to indicate we do not want to replicate select */
propagateNow(-1,&shared.exec,1,PROPAGATE_AOF|PROPAGATE_REPL);
}
redisOpArrayFree(&server.also_propagate);
}
propagateNow将指定的命令(在指定数据库 ID 的上下文中)传播到 AOF 和从属服务器。
static void propagateNow(int dbid, robj **argv, int argc, int target) {
if (!shouldPropagate(target))
return;
/* This needs to be unreachable since the dataset should be fixed during
* replica pause (otherwise data may be lost during a failover) */
serverAssert(!(isPausedActions(PAUSE_ACTION_REPLICA) &&
(!server.client_pause_in_transaction)));
if (server.aof_state != AOF_OFF && target & PROPAGATE_AOF)
feedAppendOnlyFile(dbid,argv,argc);
if (target & PROPAGATE_REPL)
replicationFeedSlaves(server.slaves,dbid,argv,argc);
}
feedAppendOnlyFile 方法才是实际执行写入的AOF文件的方法
/* Write the given command to the aof file.
* dictid - dictionary id the command should be applied to,
* this is used in order to decide if a `select` command
* should also be written to the aof. Value of -1 means
* to avoid writing `select` command in any case.
* argv - The command to write to the aof.
* argc - Number of values in argv
*/
void feedAppendOnlyFile(int dictid, robj **argv, int argc) {
sds buf = sdsempty();
serverAssert(dictid == -1 || (dictid >= 0 && dictid < server.dbnum));
/* Feed timestamp if needed */
if (server.aof_timestamp_enabled) {
sds ts = genAofTimestampAnnotationIfNeeded(0);
if (ts != NULL) {
buf = sdscatsds(buf, ts);
sdsfree(ts);
}
}
/* The DB this command was targeting is not the same as the last command
* we appended. To issue a SELECT command is needed. */
if (dictid != -1 && dictid != server.aof_selected_db) {
char seldb[64];
snprintf(seldb,sizeof(seldb),"%d",dictid);
buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
(unsigned long)strlen(seldb),seldb);
server.aof_selected_db = dictid;
}
/* All commands should be propagated the same way in AOF as in replication.
* No need for AOF-specific translation. */
buf = catAppendOnlyGenericCommand(buf,argc,argv);
/* Append to the AOF buffer. This will be flushed on disk just before
* of re-entering the event loop, so before the client will get a
* positive reply about the operation performed. */
/*这段代码是对AOF(Append-Only File)缓冲区进行追加操作的说明。AOF缓冲区是Redis用于持久化数据的一种机制,它会将所有写操作追加到一个文件中。意味着请求后AOF不是直接写磁盘,而是写入一个缓冲区,由缓冲区写入磁盘*/
if (server.aof_state == AOF_ON ||
(server.aof_state == AOF_WAIT_REWRITE && server.child_type == CHILD_TYPE_AOF))
{
server.aof_buf = sdscatlen(server.aof_buf, buf, sdslen(buf));
}
sdsfree(buf);
}
想看如何什么时候落盘可以查看flushAppendOnlyFile
函数
/* Write the append only file buffer on disk.
*
* Since we are required to write the AOF before replying to the client,
* and the only way the client socket can get a write is entering when
* the event loop, we accumulate all the AOF writes in a memory
* buffer and write it on disk using this function just before entering
* the event loop again.
*
* About the 'force' argument:
*
* When the fsync policy is set to 'everysec' we may delay the flush if there
* is still an fsync() going on in the background thread, since for instance
* on Linux write(2) will be blocked by the background fsync anyway.
* When this happens we remember that there is some aof buffer to be
* flushed ASAP, and will try to do that in the serverCron() function.
*
* However if force is set to 1 we'll write regardless of the background
* fsync. */
#define AOF_WRITE_LOG_ERROR_RATE 30 /* Seconds between errors logging. */
//force为0,不强制落盘,1为强制落盘
void flushAppendOnlyFile(int force) {
//刷盘的核心操作是在flushAppendOnlyFile函数里面,这里要区分三种落盘的方式,分别是
//Alaways: 同步会写,每个命令执行完,同步的将日志落盘 AOF_FSYNC_ALWAYS
//Everysec:每秒落盘,每个写命令执行完毕以后,每隔一秒落盘 AOF_FSYNC_EVERYSEC
//No:由操作系统决定何时落盘
}
为什么AOF还要刷新落盘呢?
原因上文中,请求追加AOF只是写到了缓冲区,而不是磁盘,实际落盘的函数是flushAppendOnlyFile
,而flushAppendOnlyFile
函数有两处地方调用了
//这个是主函数,整个redis的初始化都在这里面
int main(int argc, char **argv) {
......../**干扰的代码*/
aeMain(server.el);
......../**干扰的代码*/
}
//函数 aeMain 实现事件循环, redis 主线程在完成初始化后,即调用 aeMain 进入事件循环
void aeMain(aeEventLoop *eventLoop) {
eventLoop->stop = 0;
while (!eventLoop->stop) {
aeProcessEvents(eventLoop, AE_ALL_EVENTS|
AE_CALL_BEFORE_SLEEP|
AE_CALL_AFTER_SLEEP);
}
}
/* Process every pending time event, then every pending file event
* (that may be registered by time event callbacks just processed).
* Without special flags the function sleeps until some file event
* fires, or when the next time event occurs (if any).
*
* 删除干扰项注释
* if flags has AE_CALL_BEFORE_SLEEP set, the beforesleep callback is called.
*
* The function returns the number of events processed. */
//函数实现事件循环的处理,它通过选择器获取网络事件,然后交给各个事件处理器去处理
int aeProcessEvents(aeEventLoop *eventLoop, int flags)
{
......../**干扰的代码*/
if (eventLoop->beforesleep != NULL && (flags & AE_CALL_BEFORE_SLEEP))
eventLoop->beforesleep(eventLoop);
......../**干扰的代码*/
}
/* This function gets called every time Redis is entering the
* main loop of the event driven library, that is, before to sleep
* for ready file descriptors.
*
* Note: This function is (currently) called from two functions:
* 1. aeMain - The main server loop
* 2. processEventsWhileBlocked - Process clients during RDB/AOF load
*
* If it was called from processEventsWhileBlocked we don't want
* to perform all actions (For example, we don't want to expire
* keys), but we do need to perform some actions.
*
* The most important is freeClientsInAsyncFreeQueue but we also
* call some other low-risk functions. */
void beforeSleep(struct aeEventLoop *eventLoop) {
......../**干扰的代码*/
if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE)
flushAppendOnlyFile(0);
......../**干扰的代码*/
}
这个是由服务端定时调用serverCron
int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
......../**干扰的代码*/
/* AOF postponed flush: Try at every cron cycle if the slow fsync
* completed. */
if ((server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE) &&
server.aof_flush_postponed_start)
{
flushAppendOnlyFile(0);
}
/* AOF write errors: in this case we have a buffer to flush as well and
* clear the AOF error in case of success to make the DB writable again,
* however to try every second is enough in case of 'hz' is set to
* a higher frequency. */
run_with_period(1000) {
if ((server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE) &&
server.aof_last_write_status == C_ERR)
{
flushAppendOnlyFile(0);
}
}
......../**干扰的代码*/
}
这个是由服务端定时调用serverCron
int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
......../**干扰的代码*/
/* Check if a background saving or AOF rewrite in progress terminated. */
if (hasActiveChildProcess() || ldbPendingChildren())
{
run_with_period(1000) receiveChildInfo();
checkChildrenDone();
} else {
/* If there is not a background saving/rewrite in progress check if
* we have to save/rewrite now. */
for (j = 0; j < server.saveparamslen; j++) {
struct saveparam *sp = server.saveparams+j;
/* Save if we reached the given amount of changes,
* the given amount of seconds, and if the latest bgsave was
* successful or if, in case of an error, at least
* CONFIG_BGSAVE_RETRY_DELAY seconds already elapsed. */
//在满足以下条件之一时是否执行保存操作:
//1、达到给定的更改次数(changes)
//2、达到给定的秒数(seconds)
//3、如果最新的bgsave操作成功,或者在出现错误时,已经过了至少CONFIG_BGSAVE_RETRY_DELAY秒的时间。
if (server.dirty >= sp->changes &&
server.unixtime-server.lastsave > sp->seconds &&
(server.unixtime-server.lastbgsave_try >
CONFIG_BGSAVE_RETRY_DELAY ||
server.lastbgsave_status == C_OK))
{
serverLog(LL_NOTICE,"%d changes in %d seconds. Saving...",
sp->changes, (int)sp->seconds);
rdbSaveInfo rsi, *rsiptr;
rsiptr = rdbPopulateSaveInfo(&rsi);
rdbSaveBackground(SLAVE_REQ_NONE,server.rdb_filename,rsiptr,RDBFLAGS_NONE);
break;
}
}
......../**干扰的代码*/
/* Start a scheduled BGSAVE if the corresponding flag is set. This is
* useful when we are forced to postpone a BGSAVE because an AOF
* rewrite is in progress.
*
* Note: this code must be after the replicationCron() call above so
* make sure when refactoring this file to keep this order. This is useful
* because we want to give priority to RDB savings for replication. */
//在特定情况下启动定时后台保存(BGSAVE)的逻辑。当正在进行AOF重写时,可能需要推迟执行BGSAVE操作。在这种情况下,通过设置相应的标志位来启动定时后台保存。代码中还提到,为了优先进行RDB保存以用于复制,该段代码必须放在上面的replicationCron()调用之后。
if (!hasActiveChildProcess() &&
server.rdb_bgsave_scheduled &&
(server.unixtime-server.lastbgsave_try > CONFIG_BGSAVE_RETRY_DELAY ||
server.lastbgsave_status == C_OK))
{
rdbSaveInfo rsi, *rsiptr;
rsiptr = rdbPopulateSaveInfo(&rsi);
if (rdbSaveBackground(SLAVE_REQ_NONE,server.rdb_filename,rsiptr,RDBFLAGS_NONE) == C_OK)
server.rdb_bgsave_scheduled = 0;
}
......../**干扰的代码*/
}