




    /* Create the timer callback, this is our way to process many background
     * operations incrementally, like clients timeout, eviction of unaccessed
     * expired keys and so forth. */
    if (aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL) == AE_ERR) {           // 创建事件事件
        serverPanic("Can't create event loop timers.");


/* This is our timer interrupt, called server.hz times per second.
 * Here is where we do a number of things that need to be done asynchronously.
 * For instance:
 * - Active expired keys collection (it is also performed in a lazy way on
 *   lookup).
 * - Software watchdog.
 * - Update some statistic.
 * - Incremental rehashing of the DBs hash tables.  
 * - Triggering BGSAVE / AOF rewrite, and handling of terminated children.
 * - Clients timeout of different kinds.
 * - Replication reconnection.
 * - Many more...
 * Everything directly called here will be called server.hz times per second,
 * so in order to throttle execution of things we want to do less frequently
 * a macro is used: run_with_period(milliseconds) { .... }

int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
    int j;

    /* Software watchdog: deliver the SIGALRM that will reach the signal
     * handler if we don't return here fast enough. */
    if (server.watchdog_period) watchdogScheduleSignal(server.watchdog_period);         // 设置定时信号主要是利用setitimer来发送定时信号

    /* Update the time cache. */
    updateCachedTime();                     // 更新缓存时间

    server.hz = server.config_hz;               // 获取server配置的周期时间
    /* Adapt the server.hz value to the number of configured clients. If we have
     * many clients, we want to call serverCron() with an higher frequency. */
    if (server.dynamic_hz) {                                                        // 是否配置的是动态频率  如果是动态的频率则会每次都通过计算重新获取休眠时间
        while (listLength(server.clients) / server.hz >
               MAX_CLIENTS_PER_CLOCK_TICK)                                          // 当前连接的客户端的数量除了频率 是否大于每个时钟值
            server.hz *= 2;                                                         // 如果空闲则调大频率
            if (server.hz > CONFIG_MAX_HZ) {                                        // 检查不能超过最大的设置
                server.hz = CONFIG_MAX_HZ;

    run_with_period(100) {                                                         // 通过loop执行的次数来判断是否执行或者小于一个执行hz的时间,从而扩展到指定的时间长度饿回调
        trackInstantaneousMetric(STATS_METRIC_COMMAND,server.stat_numcommands);    // 每次都运行采样数据并保存

    /* We have just LRU_BITS bits per object for LRU information.
     * So we use an (eventually wrapping) LRU clock.
     * Note that even if the counter wraps it's not a big problem,
     * everything will still work but some object will appear younger
     * to Redis. However for this to happen a given object should never be
     * touched for all the time needed to the counter to wrap, which is
     * not likely.
     * Note that you can change the resolution altering the
     * LRU_CLOCK_RESOLUTION define. */
    unsigned long lruclock = getLRUClock();                                     // 获取LRU时间

    /* Record the max memory used since the server was started. */
    if (zmalloc_used_memory() > server.stat_peak_memory)                        // 记录使用的超过峰值的内存
        server.stat_peak_memory = zmalloc_used_memory();                        

    run_with_period(100) {                                                      // 每次执行一次
        /* Sample the RSS and other metrics here since this is a relatively slow call.
         * We must sample the zmalloc_used at the same time we take the rss, otherwise
         * the frag ratio calculate may be off (ratio of two samples at different times) */
        server.cron_malloc_stats.process_rss = zmalloc_get_rss();               // 记录RSS 数据
        server.cron_malloc_stats.zmalloc_used = zmalloc_used_memory();          // 记录使用的内存数据
        /* Sampling the allcator info can be slow too.
         * The fragmentation ratio it'll show is potentically more accurate
         * it excludes other RSS pages such as: shared libraries, LUA and other non-zmalloc
         * allocations, and allocator reserved pages that can be pursed (all not actual frag) */
        /* in case the allocator isn't providing these stats, fake them so that
         * fragmention info still shows some (inaccurate metrics) */
        if (!server.cron_malloc_stats.allocator_resident) {
            /* LUA memory isn't part of zmalloc_used, but it is part of the process RSS,
             * so we must desuct it in order to be able to calculate correct
             * "allocator fragmentation" ratio */
            size_t lua_memory = lua_gc(server.lua,LUA_GCCOUNT,0)*1024LL;
            server.cron_malloc_stats.allocator_resident = server.cron_malloc_stats.process_rss - lua_memory;
        if (!server.cron_malloc_stats.allocator_active)
            server.cron_malloc_stats.allocator_active = server.cron_malloc_stats.allocator_resident;
        if (!server.cron_malloc_stats.allocator_allocated)
            server.cron_malloc_stats.allocator_allocated = server.cron_malloc_stats.zmalloc_used;

    /* We received a SIGTERM, shutting down here in a safe way, as it is
     * not ok doing so inside the signal handler. */
    if (server.shutdown_asap) {                             // 如果收到了SIGTERM信号则安全的关闭
        if (prepareForShutdown(SHUTDOWN_NOFLAGS) == C_OK) exit(0);          // 调用prepareForShutdown来关闭
        serverLog(LL_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
        server.shutdown_asap = 0;

    /* Show some info about non-empty databases */
    run_with_period(5000) {                                         // 每loop五十次来执行一次
        for (j = 0; j < server.dbnum; j++) {
            long long size, used, vkeys;

            size = dictSlots(server.db[j].dict);                    // 主要展示每个db的当前保存数据的大小,空闲数等信息
            used = dictSize(server.db[j].dict);
            vkeys = dictSize(server.db[j].expires);
            if (used || vkeys) {
                serverLog(LL_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
                /* dictPrintStats(server.dict); */

    /* Show information about connected clients */
    if (!server.sentinel_mode) {                                    // 是否是集群模式
        run_with_period(5000) {                                     // 每loop五十次 展示一下连接的信息与从节点的数量 和 使用的内存大小
                "%lu clients connected (%lu replicas), %zu bytes in use",

    /* We need to do a few operations on clients asynchronously. */
    clientsCron();                                                  // 期望能够更新连接的客户端 如超时

    /* Handle background operations on Redis databases. */
    databasesCron();                                                // 进行数据库的后台操作命令 如key过期 扩充大小 或者rehash等操作

    /* Start a scheduled AOF rewrite if this was requested by the user while
     * a BGSAVE was in progress. */
    if (server.rdb_child_pid == -1 && server.aof_child_pid == -1 &&
        rewriteAppendOnlyFileBackground();                          // 检查是否需要后台重写AOF文件

      /* Check if a background saving or AOF rewrite in progress terminated. */
    if (server.rdb_child_pid != -1 || server.aof_child_pid != -1 ||
        ldbPendingChildren())                                       // 检查当前是否有后台的rdb或者AOF重写的任务进程
        int statloc;
        pid_t pid;

        if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {            // 检查任务是否完成
            int exitcode = WEXITSTATUS(statloc);
            int bysignal = 0;

            if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);

            if (pid == -1) {                                            // 如果为-1则执行出错
                serverLog(LL_WARNING,"wait3() returned an error: %s. "
                    "rdb_child_pid = %d, aof_child_pid = %d",
                    (int) server.rdb_child_pid,
                    (int) server.aof_child_pid);
            } else if (pid == server.rdb_child_pid) {
                backgroundSaveDoneHandler(exitcode,bysignal);           // 任务完成
                if (!bysignal && exitcode == 0) receiveChildInfo();
            } else if (pid == server.aof_child_pid) {
                if (!bysignal && exitcode == 0) receiveChildInfo();
            } else {
                if (!ldbRemoveChild(pid)) {
                        "Warning, detected child with unmatched pid: %ld",
    } else {
        /* If there is not a background saving/rewrite in progress check if
         * we have to save/rewrite now. */
        for (j = 0; j < server.saveparamslen; j++) {                    // 获取执行任务的参数
            struct saveparam *sp = server.saveparams+j;

            /* Save if we reached the given amount of changes,
             * the given amount of seconds, and if the latest bgsave was
             * successful or if, in case of an error, at least
             * CONFIG_BGSAVE_RETRY_DELAY seconds already elapsed. */
            if (server.dirty >= sp->changes &&
                server.unixtime-server.lastsave > sp->seconds &&
                (server.unixtime-server.lastbgsave_try >
                 CONFIG_BGSAVE_RETRY_DELAY ||
                 server.lastbgsave_status == C_OK))
                serverLog(LL_NOTICE,"%d changes in %d seconds. Saving...",
                    sp->changes, (int)sp->seconds);
                rdbSaveInfo rsi, *rsiptr;
                rsiptr = rdbPopulateSaveInfo(&rsi);
                rdbSaveBackground(server.rdb_filename,rsiptr);              // 开始执行rdb后台保存任务

        /* Trigger an AOF rewrite if needed. */
        if (server.aof_state == AOF_ON &&
            server.rdb_child_pid == -1 &&
            server.aof_child_pid == -1 &&
            server.aof_rewrite_perc &&
            server.aof_current_size > server.aof_rewrite_min_size)
            long long base = server.aof_rewrite_base_size ?
                server.aof_rewrite_base_size : 1;
            long long growth = (server.aof_current_size*100/base) - 100;
            if (growth >= server.aof_rewrite_perc) {
                serverLog(LL_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
                rewriteAppendOnlyFileBackground();                          // 检查是否需要进行aof冲洗而任务 如果需要执行则开始aof重写任务

    /* AOF postponed flush: Try at every cron cycle if the slow fsync
     * completed. */
    if (server.aof_flush_postponed_start) flushAppendOnlyFile(0);

    /* AOF write errors: in this case we have a buffer to flush as well and
     * clear the AOF error in case of success to make the DB writable again,
     * however to try every second is enough in case of 'hz' is set to
     * an higher frequency. */
    run_with_period(1000) {                                 // 每loop十次检查最后是否有aof错误 如果有错误则将aof文件刷盘
        if (server.aof_last_write_status == C_ERR)

    /* Close clients that need to be closed asynchronous */
    freeClientsInAsyncFreeQueue();                              // 释放要关闭的异步操作的客户端

    /* Clear the paused clients flag if needed. */
    clientsArePaused(); /* Don't check return value, just use the side effect.*/

    /* Replication cron function -- used to reconnect to master,
     * detect transfer failures, start background RDB transfers and so forth. */
    run_with_period(1000) replicationCron();                    // 每loop十次检查是否需要复制

    /* Run the Redis Cluster cron. */
    run_with_period(100) {
        if (server.cluster_enabled) clusterCron();              // 检查集群的连接情况

    /* Run the Sentinel timer if we are in sentinel mode. */
    if (server.sentinel_mode) sentinelTimer();                  // 是否是哨兵模式 如果是则开始哨兵的定时器

    /* Cleanup expired MIGRATE cached sockets. */
    run_with_period(1000) {                                     // 每loop 10次就检查是否需要关闭超时的客户端

    /* Start a scheduled BGSAVE if the corresponding flag is set. This is
     * useful when we are forced to postpone a BGSAVE because an AOF
     * rewrite is in progress.
     * Note: this code must be after the replicationCron() call above so
     * make sure when refactoring this file to keep this order. This is useful
     * because we want to give priority to RDB savings for replication. */
    if (server.rdb_child_pid == -1 && server.aof_child_pid == -1 &&
        server.rdb_bgsave_scheduled &&
        (server.unixtime-server.lastbgsave_try > CONFIG_BGSAVE_RETRY_DELAY ||
         server.lastbgsave_status == C_OK))
        rdbSaveInfo rsi, *rsiptr;
        rsiptr = rdbPopulateSaveInfo(&rsi);                                 // 如果BGSAVE的标记位被设置了则执行rdb的保存工作
        if (rdbSaveBackground(server.rdb_filename,rsiptr) == C_OK)
            server.rdb_bgsave_scheduled = 0;

    server.cronloops++;                                                     // 添加执行的时间
    return 1000/server.hz;                                                  // 下一次运行的时间


/* This function handles 'background' operations we are required to do
 * incrementally in Redis databases, such as active key expiring, resizing,
 * rehashing. */
void databasesCron(void) {
    /* Expire keys by random sampling. Not required for slaves
     * as master will synthesize DELs for us. */
    if (server.active_expire_enabled && server.masterhost == NULL) {
        activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW);            // 随机的过期一些key
    } else if (server.masterhost != NULL) {
        expireSlaveKeys();                                      // 过期从的key

    /* Defrag keys gradually. */
    if (server.active_defrag_enabled)           // 是否是逐渐整理

    /* Perform hash tables rehashing if needed, but only if there are no
     * other processes saving the DB on disk. Otherwise rehashing is bad
     * as will cause a lot of copy-on-write of memory pages. */
    if (server.rdb_child_pid == -1 && server.aof_child_pid == -1) {         // 处理hash 表 重新hash
        /* We use global counters so if we stop the computation at a given
         * DB we'll be able to start from the successive in the next
         * cron loop iteration. */
        static unsigned int resize_db = 0;
        static unsigned int rehash_db = 0;
        int dbs_per_call = CRON_DBS_PER_CALL;
        int j;

        /* Don't test more DBs than we have. */
        if (dbs_per_call > server.dbnum) dbs_per_call = server.dbnum;

        /* Resize */
        for (j = 0; j < dbs_per_call; j++) {
            tryResizeHashTables(resize_db % server.dbnum);                 // 尝试重新resize数据库的大小

        /* Rehash */
        if (server.activerehashing) {                                       // 尝试重新rehash表, 因为在重新扩充大小之后就需要进行重新hash
            for (j = 0; j < dbs_per_call; j++) {
                int work_done = incrementallyRehash(rehash_db);
                if (work_done) {
                    /* If the function did some work, stop here, we'll do
                     * more at the next cron loop. */
                } else {
                    /* If this db didn't need rehash, we'll try the next one. */
                    rehash_db %= server.dbnum;




AOF机制是Redis通过保存服务器所执行的写相关的命令来记录数据库状态的,该文件是为了尽可能的在服务器出现异常情况下或者Redis服务器端出现异常情况下的时候,将丢失的数据给恢复回来。假如向Redis服务器执行set a b的过程中,服务端接受到了写命令,此时就会保存到server.aof_buf中,然后再通过配置的aof写入的策略来进行aof文件的写入。aof的文件写入策略分为三种;

appendfsync always   # 总是将aof_buf的内容写入到文件中
appendfsync everysec # 每间隔一秒将aof_buf的数据写入文件中,如果间隔不足一秒则不写入
appendfsync no 			 # 就是通过操作系统自身的落盘机制将数据落盘,写入效率高但是落盘时间不可控


void beforeSleep(struct aeEventLoop *eventLoop) {
    /* Write the AOF buffer on disk */




void call(client *c, int flags) {
    long long dirty, start, duration;
    int client_old_flags = c->flags;
    struct redisCommand *real_cmd = c->cmd;                     // 获取当前的命令

    /* Propagate the command into the AOF and replication link */
    if (flags & CMD_CALL_PROPAGATE &&
        int propagate_flags = PROPAGATE_NONE;

        /* Check if the command operated changes in the data set. If so
         * set for replication / AOF propagation. */
        if (dirty) propagate_flags |= (PROPAGATE_AOF|PROPAGATE_REPL);

        /* If the client forced AOF / replication of the command, set
         * the flags regardless of the command effects on the data set. */
        if (c->flags & CLIENT_FORCE_REPL) propagate_flags |= PROPAGATE_REPL;
        if (c->flags & CLIENT_FORCE_AOF) propagate_flags |= PROPAGATE_AOF;

        /* However prevent AOF / replication propagation if the command
         * implementations called preventCommandPropagation() or similar,
         * or if we don't have the call() flags to do so. */
        if (c->flags & CLIENT_PREVENT_REPL_PROP ||
            !(flags & CMD_CALL_PROPAGATE_REPL))
                propagate_flags &= ~PROPAGATE_REPL;
        if (c->flags & CLIENT_PREVENT_AOF_PROP ||
            !(flags & CMD_CALL_PROPAGATE_AOF))
                propagate_flags &= ~PROPAGATE_AOF;

        /* Call propagate() only if at least one of AOF / replication
         * propagation is needed. Note that modules commands handle replication
         * in an explicit way, so we never replicate them automatically. */
        if (propagate_flags != PROPAGATE_NONE && !(c->cmd->flags & CMD_MODULE))



/* Propagate the specified command (in the context of the specified database id)
 * to AOF and Slaves.
 * flags are an xor between:
 * + PROPAGATE_NONE (no propagation of command at all)
 * + PROPAGATE_AOF (propagate into the AOF file if is enabled)
 * + PROPAGATE_REPL (propagate into the replication link)
 * This should not be used inside commands implementation. Use instead
 * alsoPropagate(), preventCommandPropagation(), forceCommandPropagation().
void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
               int flags)
    if (server.aof_state != AOF_OFF && flags & PROPAGATE_AOF)       // 是否是需要记录aof
    if (flags & PROPAGATE_REPL)                                     // 如果有从 还需要发送给从


void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
    sds buf = sdsempty();
    robj *tmpargv[3];

    /* The DB this command was targeting is not the same as the last command
     * we appended. To issue a SELECT command is needed. */
    if (dictid != server.aof_selected_db) {             // 如果不是选择db命令 则先设置db
        char seldb[64];

        buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
            (unsigned long)strlen(seldb),seldb);
        server.aof_selected_db = dictid;

    if (cmd->proc == expireCommand || cmd->proc == pexpireCommand ||
        cmd->proc == expireatCommand) {                                     // 判断是什么命令 是否是过期等命令
        /* Translate EXPIRE/PEXPIRE/EXPIREAT into PEXPIREAT */
        buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]);        // 添加过期命令
    } else if (cmd->proc == setexCommand || cmd->proc == psetexCommand) {   // 如果是set命令
        /* Translate SETEX/PSETEX to SET and PEXPIREAT */
        tmpargv[0] = createStringObject("SET",3);
        tmpargv[1] = argv[1];
        tmpargv[2] = argv[3];
        buf = catAppendOnlyGenericCommand(buf,3,tmpargv);                   // 添加过期命令到缓存区
        buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]);      
    } else if (cmd->proc == setCommand && argc > 3) {
        int i;
        robj *exarg = NULL, *pxarg = NULL;
        /* Translate SET [EX seconds][PX milliseconds] to SET and PEXPIREAT */
        buf = catAppendOnlyGenericCommand(buf,3,argv);                      // 如果是多个参数的set命令
        for (i = 3; i < argc; i ++) {
            if (!strcasecmp(argv[i]->ptr, "ex")) exarg = argv[i+1];
            if (!strcasecmp(argv[i]->ptr, "px")) pxarg = argv[i+1];
        serverAssert(!(exarg && pxarg));
        if (exarg)
            buf = catAppendOnlyExpireAtCommand(buf,server.expireCommand,argv[1],   // 都添加到缓冲区
        if (pxarg)
            buf = catAppendOnlyExpireAtCommand(buf,server.pexpireCommand,argv[1],
    } else {
        /* All the other commands don't need translation or need the
         * same translation already operated in the command vector
         * for the replication itself. */
        buf = catAppendOnlyGenericCommand(buf,argc,argv);

    /* Append to the AOF buffer. This will be flushed on disk just before
     * of re-entering the event loop, so before the client will get a
     * positive reply about the operation performed. */
    if (server.aof_state == AOF_ON)                                             //  是否打开了AOF_ON标志并将数据拷贝到aof_buf中
        server.aof_buf = sdscatlen(server.aof_buf,buf,sdslen(buf));

    /* If a background append only file rewriting is in progress we want to
     * accumulate the differences between the child DB and the current one
     * in a buffer, so that when the child process will do its work we
     * can append the differences to the new append only file. */
    if (server.aof_child_pid != -1)
        aofRewriteBufferAppend((unsigned char*)buf,sdslen(buf));                    

    sdsfree(buf);                                                               // 释放内存




/* Write the append only file buffer on disk.
 * Since we are required to write the AOF before replying to the client,
 * and the only way the client socket can get a write is entering when the
 * the event loop, we accumulate all the AOF writes in a memory
 * buffer and write it on disk using this function just before entering
 * the event loop again.
 * About the 'force' argument:
 * When the fsync policy is set to 'everysec' we may delay the flush if there
 * is still an fsync() going on in the background thread, since for instance
 * on Linux write(2) will be blocked by the background fsync anyway.
 * When this happens we remember that there is some aof buffer to be
 * flushed ASAP, and will try to do that in the serverCron() function.
 * However if force is set to 1 we'll write regardless of the background
 * fsync. */
#define AOF_WRITE_LOG_ERROR_RATE 30 /* Seconds between errors logging. */
void flushAppendOnlyFile(int force) {
    ssize_t nwritten;
    int sync_in_progress = 0;
    mstime_t latency;

    if (sdslen(server.aof_buf) == 0) return;                        // 如果待写入缓冲区为0 则返回不执行

    if (server.aof_fsync == AOF_FSYNC_EVERYSEC)                      // 配置的策略是否是每秒中执行
        sync_in_progress = bioPendingJobsOfType(BIO_AOF_FSYNC) != 0;   // 返回特定的同步标志 是否有任务在执行

    if (server.aof_fsync == AOF_FSYNC_EVERYSEC && !force) {           // 是否是每秒并且不是强制执行
        /* With this append fsync policy we do background fsyncing.
         * If the fsync is still in progress we can try to delay
         * the write for a couple of seconds. */
        if (sync_in_progress) {
            if (server.aof_flush_postponed_start == 0) {                 // 如果执行完成
                /* No previous write postponing, remember that we are
                 * postponing the flush and return. */
                server.aof_flush_postponed_start = server.unixtime;      // 记录当前开始IDE时候
            } else if (server.unixtime - server.aof_flush_postponed_start < 2) {   // 如果小于2  则在等待fsync完成并返回
                /* We were already waiting for fsync to finish, but for less
                 * than two seconds this is still ok. Postpone again. */
            /* Otherwise fall trough, and go write since we can't wait
             * over two seconds. */
            serverLog(LL_NOTICE,"Asynchronous AOF fsync is taking too long (disk is busy?). Writing the AOF buffer without waiting for fsync to complete, this may slow down Redis.");
    /* We want to perform a single write. This should be guaranteed atomic
     * at least if the filesystem we are writing is a real physical one.
     * While this will save us against the server being killed I don't think
     * there is much to do about the whole server stopping for power problems
     * or alike */

    nwritten = aofWrite(server.aof_fd,server.aof_buf,sdslen(server.aof_buf));     // 写入数据
    /* We want to capture different events for delayed writes:
     * when the delay happens with a pending fsync, or with a saving child
     * active, and when the above two conditions are missing.
     * We also use an additional event name to save all samples which is
     * useful for graphing / monitoring purposes. */
    if (sync_in_progress) {                                                      
    } else if (server.aof_child_pid != -1 || server.rdb_child_pid != -1) {
    } else {

    /* We performed the write so reset the postponed flush sentinel to zero. */
    server.aof_flush_postponed_start = 0;

    if (nwritten != (ssize_t)sdslen(server.aof_buf)) {                      // 如果写入的长度与aof_buf长度不一致
        static time_t last_write_error_log = 0;
        int can_log = 0;

        /* Limit logging rate to 1 line per AOF_WRITE_LOG_ERROR_RATE seconds. */
        if ((server.unixtime - last_write_error_log) > AOF_WRITE_LOG_ERROR_RATE) {
            can_log = 1;
            last_write_error_log = server.unixtime;

        /* Log the AOF write error and record the error code. */
        if (nwritten == -1) {                                                   // 如果写入失败则记录失败
            if (can_log) {
                serverLog(LL_WARNING,"Error writing to the AOF file: %s",
                server.aof_last_write_errno = errno;
        } else {
            if (can_log) {
                serverLog(LL_WARNING,"Short write while writing to "
                                       "the AOF file: (nwritten=%lld, "
                                       (long long)nwritten,
                                       (long long)sdslen(server.aof_buf));

            if (ftruncate(server.aof_fd, server.aof_current_size) == -1) {   // 截断当前的缓冲区大小
                if (can_log) {
                    serverLog(LL_WARNING, "Could not remove short write "
                             "from the append-only file.  Redis may refuse "
                             "to load the AOF the next time it starts.  "
                             "ftruncate: %s", strerror(errno));
            } else {
                /* If the ftruncate() succeeded we can set nwritten to
                 * -1 since there is no longer partial data into the AOF. */
                nwritten = -1;
            server.aof_last_write_errno = ENOSPC;

        /* Handle the AOF write error. */
        if (server.aof_fsync == AOF_FSYNC_ALWAYS) {                             // 如果是总是落盘则此次落盘失败则退出
            /* We can't recover when the fsync policy is ALWAYS since the
             * reply for the client is already in the output buffers, and we
             * have the contract with the user that on acknowledged write data
             * is synced on disk. */
            serverLog(LL_WARNING,"Can't recover from AOF write error when the AOF fsync policy is 'always'. Exiting...");
        } else {
            /* Recover from failed write leaving data into the buffer. However
             * set an error to stop accepting writes as long as the error
             * condition is not cleared. */
            server.aof_last_write_status = C_ERR;                               // 记录最后一次写入失败

            /* Trim the sds buffer if there was a partial write, and there
             * was no way to undo it with ftruncate(2). */
            if (nwritten > 0) {
                server.aof_current_size += nwritten;
            return; /* We'll try again on the next call... */
    } else {
        /* Successful write(2). If AOF was in error state, restore the
         * OK state and log the event. */
        if (server.aof_last_write_status == C_ERR) {                            // 记录失败
                "AOF write error looks solved, Redis can write again.");
            server.aof_last_write_status = C_OK;
    server.aof_current_size += nwritten;                                        // 保存已经写入的缓冲区大小

    /* Re-use AOF buffer when it is small enough. The maximum comes from the
     * arena size of 4k minus some overhead (but is otherwise arbitrary). */
    if ((sdslen(server.aof_buf)+sdsavail(server.aof_buf)) < 4000) {
    } else {
        server.aof_buf = sdsempty();

    /* Don't fsync if no-appendfsync-on-rewrite is set to yes and there are
     * children doing I/O in the background. */
    if (server.aof_no_fsync_on_rewrite &&
        (server.aof_child_pid != -1 || server.rdb_child_pid != -1))                 // 如果正在后台进行操作则返回

    /* Perform the fsync if needed. */
    if (server.aof_fsync == AOF_FSYNC_ALWAYS) {                                     // 如果总是fsync则
        /* redis_fsync is defined as fdatasync() for Linux in order to avoid
         * flushing metadata. */
        redis_fsync(server.aof_fd); /* Let's try to get this data on the disk */   // 调用fsync函数直接落盘
        server.aof_last_fsync = server.unixtime;                                    // 保存最后一次时间
    } else if ((server.aof_fsync == AOF_FSYNC_EVERYSEC &&
                server.unixtime > server.aof_last_fsync)) {                         // 如果是每秒都落盘并且server时间大于最后一次时间
        if (!sync_in_progress) aof_background_fsync(server.aof_fd);                 // 检查是否需要后台落盘
        server.aof_last_fsync = server.unixtime;                                    // 设置最后的fsyn时间


/* Starts a background task that performs fsync() against the specified
 * file descriptor (the one of the AOF file) in another thread. */
void aof_background_fsync(int fd) {
    bioCreateBackgroundJob(BIO_AOF_FSYNC,(void*)(long)fd,NULL,NULL);  // 创建一个bio任务
void bioCreateBackgroundJob(int type, void *arg1, void *arg2, void *arg3) {
    struct bio_job *job = zmalloc(sizeof(*job));

    job->time = time(NULL);                         // 获取时间
    job->arg1 = arg1;                               // 保存参数
    job->arg2 = arg2;
    job->arg3 = arg3;
    pthread_mutex_lock(&bio_mutex[type]);           // 获取锁
    listAddNodeTail(bio_jobs[type],job);            // 添加任务到列表中
    pthread_cond_signal(&bio_newjob_cond[type]);    // 唤醒等待执行该任务的线程
    pthread_mutex_unlock(&bio_mutex[type]);         // 释放该类型的锁


void *bioProcessBackgroundJobs(void *arg) {
    struct bio_job *job;
    unsigned long type = (unsigned long) arg;
    sigset_t sigset;

    /* Check that the type is within the right interval. */
    if (type >= BIO_NUM_OPS) {
            "Warning: bio thread started with wrong type %lu",type);
        return NULL;

    /* Make the thread killable at any time, so that bioKillThreads()
     * can work reliably. */
    pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);    
    pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);

    /* Block SIGALRM so we are sure that only the main thread will
     * receive the watchdog signal. */
    sigaddset(&sigset, SIGALRM);
    if (pthread_sigmask(SIG_BLOCK, &sigset, NULL))
            "Warning: can't mask SIGALRM in bio.c thread: %s", strerror(errno));

    while(1) {
        listNode *ln;               

        /* The loop always starts with the lock hold. */
        if (listLength(bio_jobs[type]) == 0) {                      // 如果当前的类型没有任务则进入睡眠
        /* Pop the job from the queue. */
        ln = listFirst(bio_jobs[type]);                             // 获取队列中的第一个
        job = ln->value;                                            // 获取任务类型
        /* It is now possible to unlock the background system as we know have
         * a stand alone job structure to process.*/
        pthread_mutex_unlock(&bio_mutex[type]);                     // 获取该任务的锁

        /* Process the job accordingly to its type. */
        if (type == BIO_CLOSE_FILE) {
        } else if (type == BIO_AOF_FSYNC) {                         // 如果是AOF_FSYNC
            redis_fsync((long)job->arg1);                           // 直接调用redis_fsync将数据落盘
        } else if (type == BIO_LAZY_FREE) {
            /* What we free changes depending on what arguments are set:
             * arg1 -> free the object at pointer.
             * arg2 & arg3 -> free two dictionaries (a Redis DB).
             * only arg3 -> free the skiplist. */
            if (job->arg1)
            else if (job->arg2 && job->arg3)
            else if (job->arg3)
        } else {
            serverPanic("Wrong job type in bioProcessBackgroundJobs().");

        /* Lock again before reiterating the loop, if there are no longer
         * jobs to process we'll block again in pthread_cond_wait(). */

        /* Unblock threads blocked on bioWaitStepOfType() if any. */



