redis淘汰策略原理

基于redis 5.0

入口

redis每次执行命令时,都会检查当前是否需要清理内存。


// server.c
int processCommand(client *c) {
    ……
    /* Handle the maxmemory directive.
     *
     * Note that we do not want to reclaim memory if we are here re-entering
     * the event loop since there is a busy Lua script running in timeout
     * condition, to avoid mixing the propagation of scripts with the
     * propagation of DELs due to eviction. */
    if (server.maxmemory && !server.lua_timedout) {
        int out_of_memory = freeMemoryIfNeededAndSafe() == C_ERR;
        /* freeMemoryIfNeeded may flush slave output buffers. This may result
         * into a slave, that may be the active client, to be freed. */
        if (server.current_client == NULL) return C_ERR;

        /* It was impossible to free enough memory, and the command the client
         * is trying to execute is denied during OOM conditions or the client
         * is in MULTI/EXEC context? Error. */
        if (out_of_memory &&
            (c->cmd->flags & CMD_DENYOOM ||
             (c->flags & CLIENT_MULTI && c->cmd->proc != execCommand))) {
            flagTransaction(c);
            addReply(c, shared.oomerr);
            return C_OK;
        }

        /* Save out_of_memory result at script start, otherwise if we check OOM
         * untill first write within script, memory used by lua stack and
         * arguments might interfere. */
        if (c->cmd->proc == evalCommand || c->cmd->proc == evalShaCommand) {
            server.lua_oom = out_of_memory;
        }
    } 
    ……
}

如果当配置了最大内存,且当前没有超时的lua脚本(如果有lua脚本超时,redis会限制执行一部分命令),则检查是否需要释放内存。

// evict.c
int freeMemoryIfNeededAndSafe(void) {
    if (server.lua_timedout || server.loading) return C_OK;
    return freeMemoryIfNeeded();
}

重点就在freeMemoryIfNeeded方法,我们逐步分析下几个重要的步骤。

if (server.masterhost && server.repl_slave_ignore_maxmemory) return C_OK;

如果当前节点是从节点且忽略maxmemory(默认配置是忽略)时,不需要做任何操作。

if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL) == C_OK)
        return C_OK;

/* Get the memory status from the point of view of the maxmemory directive:
 * if the memory used is under the maxmemory setting then C_OK is returned.
 * Otherwise, if we are over the memory limit, the function returns
 * C_ERR.
 *
 * The function may return additional info via reference, only if the
 * pointers to the respective arguments is not NULL. Certain fields are
 * populated only when C_ERR is returned:
 *
 *  'total'     total amount of bytes used.
 *              (Populated both for C_ERR and C_OK)
 *
 *  'logical'   the amount of memory used minus the slaves/AOF buffers.
 *              (Populated when C_ERR is returned)
 *
 *  'tofree'    the amount of memory that should be released
 *              in order to return back into the memory limits.
 *              (Populated when C_ERR is returned)
 *
 *  'level'     this usually ranges from 0 to 1, and reports the amount of
 *              memory currently used. May be > 1 if we are over the memory
 *              limit.
 *              (Populated both for C_ERR and C_OK)
 */
int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level) {
    size_t mem_reported, mem_used, mem_tofree;

    /* Check if we are over the memory usage limit. If we are not, no need
     * to subtract the slaves output buffers. We can just return ASAP. */
    mem_reported = zmalloc_used_memory();
    if (total) *total = mem_reported;

    /* We may return ASAP if there is no need to compute the level. */
    int return_ok_asap = !server.maxmemory || mem_reported <= server.maxmemory;
    if (return_ok_asap && !level) return C_OK;

    /* Remove the size of slaves output buffers and AOF buffer from the
     * count of used memory. */
    mem_used = mem_reported;
    size_t overhead = freeMemoryGetNotCountedMemory();
    mem_used = (mem_used > overhead) ? mem_used-overhead : 0;

    /* Compute the ratio of memory usage. */
    if (level) {
        if (!server.maxmemory) {
            *level = 0;
        } else {
            *level = (float)mem_used / (float)server.maxmemory;
        }
    }

    if (return_ok_asap) return C_OK;

    /* Check if we are still over the memory limit. */
    if (mem_used <= server.maxmemory) return C_OK;

    /* Compute how much memory we need to free. */
    mem_tofree = mem_used - server.maxmemory;

    if (logical) *logical = mem_used;
    if (tofree) *tofree = mem_tofree;

    return C_ERR;
}

获取当前内存使用量,如果当前内存使用量小于maxmemory时,返回C_OK,也就是不执行清理,否则返回C_ERR
这个函数还通过指针传递了四个参数来获取额外的信息。

  • total:已使用的字节总数,无论是C_OK还是C_ERR都有效。
  • logical:已使用的内存减去slave或AOF缓冲区后的大小,只有返回C_ERR时有效。
  • tofree:需要释放的内存大小,只有返回C_ERR时有效。
  • level:已使用内存的比例,通常是01之间,当超出内存限制时,就大于1。无论是C_OK还是C_ERR都有效。

如果内存没有超过限制,就不再继续向下执行。
如果超过限制,则开始根据淘汰策略执行相应的逻辑。

清理

这里列出redis的几种淘汰策略。

策略 说明
noeviction 不淘汰任何数据,默认配置。
volatile-random 在所有设置了过期时间的数据中,随机筛选并淘汰。
allkeys-random 在所有的数据中,随机筛选并淘汰。
volatile-ttl 筛选最接近过期时间(较小的TTL)的数据并淘汰。
volatile-lru 在所有设置了过期时间的数据中,通过LRU算法筛选出数据并淘汰。
allkeys-lru 在所有的数据中,通过LRU算法筛选出数据并淘汰。
volatile-lfu 在设置了过期时间的键中,通过LFU算法筛选出数据并淘汰。
allkeys-lfu 在所有的数据中,通过LFU算法筛选出数据并淘汰。
if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
    goto cant_free; /* We need to free memory, but policy forbids. */

当内存淘汰策略为noeviction时,不会淘汰数据,但并不代表不做任何操作,后面cant_free步骤会讲到。

TTT/LRU/LFU 淘汰

当淘汰策略是根据算法(而不是随机)淘汰时。

struct evictionPoolEntry *pool = EvictionPoolLRU;
……
for (i = 0; i < server.dbnum; i++) {
    db = server.db+i;
    dict = (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) ?
            db->dict : db->expires;
    if ((keys = dictSize(dict)) != 0) {
        evictionPoolPopulate(i, dict, db->dict, pool);
        total_keys += keys;
    }
}

先判断是从全部数据里淘汰,还是从有设置了过期过期时间的数据里淘汰。redis的db数据的dict存的是全部数据(包含有设置过期时间的数据),而expires里存的是设置了过期时间的数据。
EvictionPoolLRU存的是待释放的数据,初始化如下:

// evict.c
#define EVPOOL_SIZE 16
#define EVPOOL_CACHED_SDS_SIZE 255

struct evictionPoolEntry {
    unsigned long long idle;    /* Object idle time (inverse frequency for LFU) */
    sds key;                    /* Key name. */
    sds cached;                 /* Cached SDS object for key name. */
    int dbid;                   /* Key DB number. */
};

static struct evictionPoolEntry *EvictionPoolLRU;

void evictionPoolAlloc(void) {
    struct evictionPoolEntry *ep;
    int j;

    ep = zmalloc(sizeof(*ep)*EVPOOL_SIZE);
    for (j = 0; j < EVPOOL_SIZE; j++) {
        ep[j].idle = 0;
        ep[j].key = NULL;
        ep[j].cached = sdsnewlen(NULL,EVPOOL_CACHED_SDS_SIZE);
        ep[j].dbid = 0;
    }
    EvictionPoolLRU = ep;
}

EvictionPoolLRUidle从小到达排序,优先淘汰idle最大的值。也就是淘汰时,从EvictionPoolLRU尾部查询key不为空的数据。

进入到evictionPoolPopulate函数。

count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);

// server.h
#define CONFIG_DEFAULT_MAXMEMORY_SAMPLES 5

// dict.c
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
    ……
    if (dictSize(d) < count) count = dictSize(d);
    maxsteps = count*10;
    ……
    tables = dictIsRehashing(d) ? 2 : 1;
    maxsizemask = d->ht[0].sizemask;
    if (tables > 1 && maxsizemask < d->ht[1].sizemask)
        maxsizemask = d->ht[1].sizemask;

    /* Pick a random point inside the larger table. */
    unsigned long i = random() & maxsizemask;
    unsigned long emptylen = 0; /* Continuous empty entries so far. */
    while(stored < count && maxsteps--) {
        for (j = 0; j < tables; j++) {
            /* Invariant of the dict.c rehashing: up to the indexes already
             * visited in ht[0] during the rehashing, there are no populated
             * buckets, so we can skip ht[0] for indexes between 0 and idx-1. */
            if (tables == 2 && j == 0 && i < (unsigned long) d->rehashidx) {
                /* Moreover, if we are currently out of range in the second
                 * table, there will be no elements in both tables up to
                 * the current rehashing index, so we jump if possible.
                 * (this happens when going from big to small table). */
                if (i >= d->ht[1].size)
                    i = d->rehashidx;
                else
                    continue;
            }
            if (i >= d->ht[j].size) continue; /* Out of range for this table. */
            dictEntry *he = d->ht[j].table[i];

            /* Count contiguous empty buckets, and jump to other
             * locations if they reach 'count' (with a minimum of 5). */
            if (he == NULL) {
                emptylen++;
                if (emptylen >= 5 && emptylen > count) {
                    i = random() & maxsizemask;
                    emptylen = 0;
                }
            } else {
                emptylen = 0;
                while (he) {
                    /* Collect all the elements of the buckets found non
                     * empty while iterating. */
                    *des = he;
                    des++;
                    he = he->next;
                    stored++;
                    if (stored == count) return stored;
                }
            }
        }
        i = (i+1) & maxsizemask;
    }
    return stored;
}

dictGetSomeKeys是根据规则获取到maxmemory_samples条记录:

  • 随机从db的某个起始位置开始,连续获取指定数量的key。
  • 如果db对应的字典正在做rehash,可能需要从两个hashtable来获取key。至于为什么rehash时要查两个hashtable,可以看redis的rehash原理,简单说就是在扩容时,会新建一个容量比较大的数组,然后从现有的数组里逐步迁移数据到新数组(由于redis是单线程,对每个指令都会限制执行时间片,所以并不一定是一次性就迁移完成,整个过程的时间可能会拉的比较长),迁移完之后再用新数据替代现有的数组。
  • 如果当前随机到的位置,连续“5”/“count”两个值比较大的那个值个槽位空时空的,则重新随机个起始位置,count值为“maxmemory_samples”/“db内的数据量”两个值比较小的那个值
  • 如果已经获取到需要的n个数据,则返回。如果没有,则循环重复获取,循环次数不超过count*10
  • 通过以上步骤,可以看出:

    • dictGetSomeKeys函数并不保证可以获取到想要的数据量(count)。
    • dictGetSomeKeys函数是随机获取部分数据,所以redis淘汰策略并不保证在所有的数据里根据规则来计算并淘汰数据。
// evict.c
void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
    int j, k, count;
    dictEntry *samples[server.maxmemory_samples];

    count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
    for (j = 0; j < count; j++) {
        unsigned long long idle;
        sds key;
        robj *o;
        dictEntry *de;

        de = samples[j];
        key = dictGetKey(de);

        /* If the dictionary we are sampling from is not the main
         * dictionary (but the expires one) we need to lookup the key
         * again in the key dictionary to obtain the value object. */
        if (server.maxmemory_policy != MAXMEMORY_VOLATILE_TTL) {
            if (sampledict != keydict) de = dictFind(keydict, key);
            o = dictGetVal(de);
        }

        /* Calculate the idle time according to the policy. This is called
         * idle just because the code initially handled LRU, but is in fact
         * just a score where an higher score means better candidate. */
        if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) {
            idle = estimateObjectIdleTime(o);
        } else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
            /* When we use an LRU policy, we sort the keys by idle time
             * so that we expire keys starting from greater idle time.
             * However when the policy is an LFU one, we have a frequency
             * estimation, and we want to evict keys with lower frequency
             * first. So inside the pool we put objects using the inverted
             * frequency subtracting the actual frequency to the maximum
             * frequency of 255. */
            idle = 255-LFUDecrAndReturn(o);
        } else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
            /* In this case the sooner the expire the better. */
            idle = ULLONG_MAX - (long)dictGetVal(de);
        } else {
            serverPanic("Unknown eviction policy in evictionPoolPopulate()");
        }

        /* Insert the element inside the pool.
         * First, find the first empty bucket or the first populated
         * bucket that has an idle time smaller than our idle time. */
        k = 0;
        while (k < EVPOOL_SIZE &&
               pool[k].key &&
               pool[k].idle < idle) k++;
        if (k == 0 && pool[EVPOOL_SIZE-1].key != NULL) {
            /* Can't insert if the element is < the worst element we have
             * and there are no empty buckets. */
            continue;
        } else if (k < EVPOOL_SIZE && pool[k].key == NULL) {
            /* Inserting into empty position. No setup needed before insert. */
        } else {
            /* Inserting in the middle. Now k points to the first element
             * greater than the element to insert.  */
            if (pool[EVPOOL_SIZE-1].key == NULL) {
                /* Free space on the right? Insert at k shifting
                 * all the elements from k to end to the right. */

                /* Save SDS before overwriting. */
                sds cached = pool[EVPOOL_SIZE-1].cached;
                memmove(pool+k+1,pool+k,
                    sizeof(pool[0])*(EVPOOL_SIZE-k-1));
                pool[k].cached = cached;
            } else {
                /* No free space on right? Insert at k-1 */
                k--;
                /* Shift all elements on the left of k (included) to the
                 * left, so we discard the element with smaller idle time. */
                sds cached = pool[0].cached; /* Save SDS before overwriting. */
                if (pool[0].key != pool[0].cached) sdsfree(pool[0].key);
                memmove(pool,pool+1,sizeof(pool[0])*k);
                pool[k].cached = cached;
            }
        }

        /* Try to reuse the cached SDS string allocated in the pool entry,
         * because allocating and deallocating this object is costly
         * (according to the profiler, not my fantasy. Remember:
         * premature optimizbla bla bla bla. */
        int klen = sdslen(key);
        if (klen > EVPOOL_CACHED_SDS_SIZE) {
            pool[k].key = sdsdup(key);
        } else {
            memcpy(pool[k].cached,key,klen+1);
            sdssetlen(pool[k].cached,klen);
            pool[k].key = pool[k].cached;
        }
        pool[k].idle = idle;
        pool[k].dbid = dbid;
    }
}

遍历随机获取的key列表,获取key对应的数据,根据TTL/LRU/LFU计算idle值,然后插入到EvictionPoolLRU数组对应的位置。

  • 如果数组是满的,且idle小于数组首个数据的idle,则不做插入。
  • 如果数组未满,从将要插入位置起,后面的数据全部向后移动。
  • 如果数组已满,释放掉数组首个数据,同时从将要插入位置起,前面的数据全部向前移动。
for (k = EVPOOL_SIZE-1; k >= 0; k--) {
    if (pool[k].key == NULL) continue;
    bestdbid = pool[k].dbid;

    if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) {
        de = dictFind(server.db[pool[k].dbid].dict,
            pool[k].key);
    } else {
        de = dictFind(server.db[pool[k].dbid].expires,
            pool[k].key);
    }

    /* Remove the entry from the pool. */
    if (pool[k].key != pool[k].cached)
        sdsfree(pool[k].key);
    pool[k].key = NULL;
    pool[k].idle = 0;

    /* If the key exists, is our pick. Otherwise it is
     * a ghost and we need to try the next element. */
    if (de) {
        bestkey = dictGetKey(de);
        break;
    } else {
        /* Ghost... Iterate again. */
    }
}

EvictionPoolLRU尾部开始遍历,获取到第一个数据存在的key,并从EvictionPoolLRU中清理掉。

随机淘汰

当淘汰策略是随机淘汰时。

else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
         server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
{
    /* When evicting a random key, we try to evict a key for
     * each DB, so we use the static 'next_db' variable to
     * incrementally visit all DBs. */
    for (i = 0; i < server.dbnum; i++) {
        j = (++next_db) % server.dbnum;
        db = server.db+j;
        dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) ?
                db->dict : db->expires;
        if (dictSize(dict) != 0) {
            de = dictGetRandomKey(dict);
            bestkey = dictGetKey(de);
            bestdbid = j;
            break;
        }
    }
}

next_db记录上一次取数据的DB,根据(++next_db) % server.dbnum算法每次都从不同的DB里随机取一条记录。

/* Finally remove the selected key. */
if (bestkey) {
    db = server.db+bestdbid;
    robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
    propagateExpire(db,keyobj,server.lazyfree_lazy_eviction);
    /* We compute the amount of memory freed by db*Delete() alone.
     * It is possible that actually the memory needed to propagate
     * the DEL in AOF and replication link is greater than the one
     * we are freeing removing the key, but we can't account for
     * that otherwise we would never exit the loop.
     *
     * AOF and Output buffer memory will be freed eventually so
     * we only care about memory used by the key space. */
    delta = (long long) zmalloc_used_memory();
    latencyStartMonitor(eviction_latency);
    if (server.lazyfree_lazy_eviction)
        dbAsyncDelete(db,keyobj);
    else
        dbSyncDelete(db,keyobj);
    latencyEndMonitor(eviction_latency);
    latencyAddSampleIfNeeded("eviction-del",eviction_latency);
    latencyRemoveNestedEvent(latency,eviction_latency);
    delta -= (long long) zmalloc_used_memory();
    mem_freed += delta;
    server.stat_evictedkeys++;
    notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
        keyobj, db->id);
    decrRefCount(keyobj);
    keys_freed++;

    /* When the memory to free starts to be big enough, we may
     * start spending so much time here that is impossible to
     * deliver data to the slaves fast enough, so we force the
     * transmission here inside the loop. */
    if (slaves) flushSlavesOutputBuffers();

    /* Normally our stop condition is the ability to release
     * a fixed, pre-computed amount of memory. However when we
     * are deleting objects in another thread, it's better to
     * check, from time to time, if we already reached our target
     * memory, since the "mem_freed" amount is computed only
     * across the dbAsyncDelete() call, while the thread can
     * release the memory all the time. */
    if (server.lazyfree_lazy_eviction && !(keys_freed % 16)) {
        if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) {
            /* Let's satisfy our stop condition. */
            mem_freed = mem_tofree;
        }
    }
}

if (!keys_freed) {
    latencyEndMonitor(latency);
    latencyAddSampleIfNeeded("eviction-cycle",latency);
    goto cant_free; /* nothing to free... */
}
  • 根据lazyfree_lazy_eviction决定是异步还是同步清理数据,再比较删除前后当前内存使用情况(zmalloc_used_memory)的差值,作为被清理掉的内存大小。
  • 通过dbAsyncDelete/dbSyncDelete删除数据,通过propagateExpire发送删除事件写入AOF和发给从节点
  • 当这一次遍历数据,但没有清理任何数据时(keys_freed值为0),跳到cant_free环节。
// bio.h
#define BIO_CLOSE_FILE    0 /* Deferred close(2) syscall. */
#define BIO_AOF_FSYNC     1 /* Deferred AOF fsync. */
#define BIO_LAZY_FREE     2 /* Deferred objects freeing. */
#define BIO_NUM_OPS       3


cant_free:
    /* We are here if we are not able to reclaim memory. There is only one
     * last thing we can try: check if the lazyfree thread has jobs in queue
     * and wait... */
    while(bioPendingJobsOfType(BIO_LAZY_FREE)) {
        if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree)
            break;
        usleep(1000);
    }
    return C_ERR;

如果进入到cant_free环节,说明当前配置的是不淘汰内存、或者没有淘汰掉任何内存。
如果当前有BIO_LAZY_FREE类型的任务,即有未执行完成的内存释放任务,睡眠等待任务完成,直到释放了足够的内存,或者任务全部执行完成。

int freeMemoryIfNeeded(void) {
        ……
    if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL) == C_OK)
        return C_OK;
        ……        
    if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
        goto cant_free;
        ……
    mem_freed = 0;
        ……
    while (mem_freed < mem_tofree) {
        int keys_freed = 0;    
            ……
        if (bestkey) {            
                delta = (long long) zmalloc_used_memory();
                ……
                delta -= (long long) zmalloc_used_memory();
            mem_freed += delta;
            keys_freed++;
                ……
            }
        if (!keys_freed) {
            ……
            goto cant_free;
        }            
    }
    ……
}

在“不释放内存/单次循环内没有内存可释放/在释放了足够内存之后”,方法结束。

综上,freeMemoryIfNeeded大致流程:

  1. 获取当前内存使用情况,判断是否超过配置的最大内存,如果没有,则结束。
  2. 根据内存使用情况和配置的最大内存,获取到需要清理的内存大小。
  3. 配置哪种策略:

    • 不淘汰策略:不执行任何清理操作,但不代表方法结束,而是调到第6步。
    • 随机淘汰策略:轮询DB,随机获取一条记录。
    • LRU/TTL/LFU策略:

      • 根据LRU/TTL/LFU获取key的淘汰值idle,值越大越优先淘汰。
      • 将当前key的idle跟淘汰数组EvictionPoolLRU里的数据做比较并插入,EvictionPoolLRU的排序为idle从小到大。
      • 从数组尾巴遍历EvictionPoolLRU数组,取第一条记录。
  4. 如果上一步有取到数据:

    • 获取当前内存情况1。
    • 写入删除AOF、通知从库、删除数据。
    • 获取当前内存情况2。
    • 内存情况1-内存情况2的值作为当次清理掉的内存数,并累加到mem_freed
  5. 如果mem_freed已经达到要目标,则结束。
  6. 如果当前配置的是不淘汰策略,或者前面没有淘汰到足够的内存:

    • 如果当前有未执行完成的内存释放任务,则循环睡眠1000微妙,每次睡眠后重新获取当前内存,判断是否有减少,并累加到mem_freed
    • 循环直到mem_freed已经达到要目标或者内存释放任务都已经完成。

同时,redis的TTL/LRU/LFU并不是在所有的数据里做比较,而是随机取部分数据做比较。这是因为如果遍历所有的数据并排序,所耗的时间、内存和CPU,对redis这种高性能工具来说,是不可接受的。

完整代码:

// evict.c

/* This function is periodically called to see if there is memory to free
 * according to the current "maxmemory" settings. In case we are over the
 * memory limit, the function will try to free some memory to return back
 * under the limit.
 *
 * The function returns C_OK if we are under the memory limit or if we
 * were over the limit, but the attempt to free memory was successful.
 * Otehrwise if we are over the memory limit, but not enough memory
 * was freed to return back under the limit, the function returns C_ERR. */
int freeMemoryIfNeeded(void) {
    /* By default replicas should ignore maxmemory
     * and just be masters exact copies. */
    if (server.masterhost && server.repl_slave_ignore_maxmemory) return C_OK;

    size_t mem_reported, mem_tofree, mem_freed;
    mstime_t latency, eviction_latency;
    long long delta;
    int slaves = listLength(server.slaves);

    /* When clients are paused the dataset should be static not just from the
     * POV of clients not being able to write, but also from the POV of
     * expires and evictions of keys not being performed. */
    if (clientsArePaused()) return C_OK;
    if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL) == C_OK)
        return C_OK;

    mem_freed = 0;

    if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
        goto cant_free; /* We need to free memory, but policy forbids. */

    latencyStartMonitor(latency);
    while (mem_freed < mem_tofree) {
        int j, k, i, keys_freed = 0;
        static unsigned int next_db = 0;
        sds bestkey = NULL;
        int bestdbid;
        redisDb *db;
        dict *dict;
        dictEntry *de;

        if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU) ||
            server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL)
        {
            struct evictionPoolEntry *pool = EvictionPoolLRU;

            while(bestkey == NULL) {
                unsigned long total_keys = 0, keys;

                /* We don't want to make local-db choices when expiring keys,
                 * so to start populate the eviction pool sampling keys from
                 * every DB. */
                for (i = 0; i < server.dbnum; i++) {
                    db = server.db+i;
                    dict = (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) ?
                            db->dict : db->expires;
                    if ((keys = dictSize(dict)) != 0) {
                        evictionPoolPopulate(i, dict, db->dict, pool);
                        total_keys += keys;
                    }
                }
                if (!total_keys) break; /* No keys to evict. */

                /* Go backward from best to worst element to evict. */
                for (k = EVPOOL_SIZE-1; k >= 0; k--) {
                    if (pool[k].key == NULL) continue;
                    bestdbid = pool[k].dbid;

                    if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) {
                        de = dictFind(server.db[pool[k].dbid].dict,
                            pool[k].key);
                    } else {
                        de = dictFind(server.db[pool[k].dbid].expires,
                            pool[k].key);
                    }

                    /* Remove the entry from the pool. */
                    if (pool[k].key != pool[k].cached)
                        sdsfree(pool[k].key);
                    pool[k].key = NULL;
                    pool[k].idle = 0;

                    /* If the key exists, is our pick. Otherwise it is
                     * a ghost and we need to try the next element. */
                    if (de) {
                        bestkey = dictGetKey(de);
                        break;
                    } else {
                        /* Ghost... Iterate again. */
                    }
                }
            }
        }

        /* volatile-random and allkeys-random policy */
        else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
                 server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
        {
            /* When evicting a random key, we try to evict a key for
             * each DB, so we use the static 'next_db' variable to
             * incrementally visit all DBs. */
            for (i = 0; i < server.dbnum; i++) {
                j = (++next_db) % server.dbnum;
                db = server.db+j;
                dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) ?
                        db->dict : db->expires;
                if (dictSize(dict) != 0) {
                    de = dictGetRandomKey(dict);
                    bestkey = dictGetKey(de);
                    bestdbid = j;
                    break;
                }
            }
        }

        /* Finally remove the selected key. */
        if (bestkey) {
            db = server.db+bestdbid;
            robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
            propagateExpire(db,keyobj,server.lazyfree_lazy_eviction);
            /* We compute the amount of memory freed by db*Delete() alone.
             * It is possible that actually the memory needed to propagate
             * the DEL in AOF and replication link is greater than the one
             * we are freeing removing the key, but we can't account for
             * that otherwise we would never exit the loop.
             *
             * AOF and Output buffer memory will be freed eventually so
             * we only care about memory used by the key space. */
            delta = (long long) zmalloc_used_memory();
            latencyStartMonitor(eviction_latency);
            if (server.lazyfree_lazy_eviction)
                dbAsyncDelete(db,keyobj);
            else
                dbSyncDelete(db,keyobj);
            latencyEndMonitor(eviction_latency);
            latencyAddSampleIfNeeded("eviction-del",eviction_latency);
            latencyRemoveNestedEvent(latency,eviction_latency);
            delta -= (long long) zmalloc_used_memory();
            mem_freed += delta;
            server.stat_evictedkeys++;
            notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
                keyobj, db->id);
            decrRefCount(keyobj);
            keys_freed++;

            /* When the memory to free starts to be big enough, we may
             * start spending so much time here that is impossible to
             * deliver data to the slaves fast enough, so we force the
             * transmission here inside the loop. */
            if (slaves) flushSlavesOutputBuffers();

            /* Normally our stop condition is the ability to release
             * a fixed, pre-computed amount of memory. However when we
             * are deleting objects in another thread, it's better to
             * check, from time to time, if we already reached our target
             * memory, since the "mem_freed" amount is computed only
             * across the dbAsyncDelete() call, while the thread can
             * release the memory all the time. */
            if (server.lazyfree_lazy_eviction && !(keys_freed % 16)) {
                if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) {
                    /* Let's satisfy our stop condition. */
                    mem_freed = mem_tofree;
                }
            }
        }

        if (!keys_freed) {
            latencyEndMonitor(latency);
            latencyAddSampleIfNeeded("eviction-cycle",latency);
            goto cant_free; /* nothing to free... */
        }
    }
    latencyEndMonitor(latency);
    latencyAddSampleIfNeeded("eviction-cycle",latency);
    return C_OK;

cant_free:
    /* We are here if we are not able to reclaim memory. There is only one
     * last thing we can try: check if the lazyfree thread has jobs in queue
     * and wait... */
    while(bioPendingJobsOfType(BIO_LAZY_FREE)) {
        if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree)
            break;
        usleep(1000);
    }
    return C_ERR;
}

存储

redis是怎么存储LRU/LFU数值的呢?redis对象里有个字段专门用来记录数值。


struct redisObject {
    ……
    unsigned lru:LRU_BITS;
    ……
};

LRU_BITS 占24bit:

  • 如果是LRU算法,记录LRU时间,可以用object idletime {key}命令查看。
  • 如果是LFU算法,则分为高16位和低8位,高16位记录上一次访问衰减时间,低8位记录计数器(Counter)数值。

至于这个数值是怎么维护的,就不在本篇文章展开了。

参考:
《[[redis 源码走读] maxmemory 数据淘汰策略]( https://blog.csdn.net/wenfh20...
走近源码:Redis如何清除过期key
Redis中的LFU算法
redis内存限制管理---maxmemory配置详解

你可能感兴趣的:(redis)