基于redis 5.0
入口
redis每次执行命令时,都会检查当前是否需要清理内存。
// server.c
int processCommand(client *c) {
……
/* Handle the maxmemory directive.
*
* Note that we do not want to reclaim memory if we are here re-entering
* the event loop since there is a busy Lua script running in timeout
* condition, to avoid mixing the propagation of scripts with the
* propagation of DELs due to eviction. */
if (server.maxmemory && !server.lua_timedout) {
int out_of_memory = freeMemoryIfNeededAndSafe() == C_ERR;
/* freeMemoryIfNeeded may flush slave output buffers. This may result
* into a slave, that may be the active client, to be freed. */
if (server.current_client == NULL) return C_ERR;
/* It was impossible to free enough memory, and the command the client
* is trying to execute is denied during OOM conditions or the client
* is in MULTI/EXEC context? Error. */
if (out_of_memory &&
(c->cmd->flags & CMD_DENYOOM ||
(c->flags & CLIENT_MULTI && c->cmd->proc != execCommand))) {
flagTransaction(c);
addReply(c, shared.oomerr);
return C_OK;
}
/* Save out_of_memory result at script start, otherwise if we check OOM
* untill first write within script, memory used by lua stack and
* arguments might interfere. */
if (c->cmd->proc == evalCommand || c->cmd->proc == evalShaCommand) {
server.lua_oom = out_of_memory;
}
}
……
}
如果当配置了最大内存,且当前没有超时的lua脚本(如果有lua脚本超时,redis会限制执行一部分命令),则检查是否需要释放内存。
// evict.c
int freeMemoryIfNeededAndSafe(void) {
if (server.lua_timedout || server.loading) return C_OK;
return freeMemoryIfNeeded();
}
重点就在freeMemoryIfNeeded
方法,我们逐步分析下几个重要的步骤。
if (server.masterhost && server.repl_slave_ignore_maxmemory) return C_OK;
如果当前节点是从节点
且忽略maxmemory
(默认配置是忽略)时,不需要做任何操作。
if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL) == C_OK)
return C_OK;
/* Get the memory status from the point of view of the maxmemory directive:
* if the memory used is under the maxmemory setting then C_OK is returned.
* Otherwise, if we are over the memory limit, the function returns
* C_ERR.
*
* The function may return additional info via reference, only if the
* pointers to the respective arguments is not NULL. Certain fields are
* populated only when C_ERR is returned:
*
* 'total' total amount of bytes used.
* (Populated both for C_ERR and C_OK)
*
* 'logical' the amount of memory used minus the slaves/AOF buffers.
* (Populated when C_ERR is returned)
*
* 'tofree' the amount of memory that should be released
* in order to return back into the memory limits.
* (Populated when C_ERR is returned)
*
* 'level' this usually ranges from 0 to 1, and reports the amount of
* memory currently used. May be > 1 if we are over the memory
* limit.
* (Populated both for C_ERR and C_OK)
*/
int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level) {
size_t mem_reported, mem_used, mem_tofree;
/* Check if we are over the memory usage limit. If we are not, no need
* to subtract the slaves output buffers. We can just return ASAP. */
mem_reported = zmalloc_used_memory();
if (total) *total = mem_reported;
/* We may return ASAP if there is no need to compute the level. */
int return_ok_asap = !server.maxmemory || mem_reported <= server.maxmemory;
if (return_ok_asap && !level) return C_OK;
/* Remove the size of slaves output buffers and AOF buffer from the
* count of used memory. */
mem_used = mem_reported;
size_t overhead = freeMemoryGetNotCountedMemory();
mem_used = (mem_used > overhead) ? mem_used-overhead : 0;
/* Compute the ratio of memory usage. */
if (level) {
if (!server.maxmemory) {
*level = 0;
} else {
*level = (float)mem_used / (float)server.maxmemory;
}
}
if (return_ok_asap) return C_OK;
/* Check if we are still over the memory limit. */
if (mem_used <= server.maxmemory) return C_OK;
/* Compute how much memory we need to free. */
mem_tofree = mem_used - server.maxmemory;
if (logical) *logical = mem_used;
if (tofree) *tofree = mem_tofree;
return C_ERR;
}
获取当前内存使用量,如果当前内存使用量小于maxmemory
时,返回C_OK
,也就是不执行清理,否则返回C_ERR
。
这个函数还通过指针传递了四个参数来获取额外的信息。
- total:已使用的字节总数,无论是
C_OK
还是C_ERR
都有效。 - logical:已使用的内存减去slave或AOF缓冲区后的大小,只有返回
C_ERR
时有效。 - tofree:需要释放的内存大小,只有返回
C_ERR
时有效。 - level:已使用内存的比例,通常是
0
到1
之间,当超出内存限制时,就大于1
。无论是C_OK
还是C_ERR
都有效。
如果内存没有超过限制,就不再继续向下执行。
如果超过限制,则开始根据淘汰策略执行相应的逻辑。
清理
这里列出redis的几种淘汰策略。
策略 | 说明 |
---|---|
noeviction | 不淘汰任何数据,默认配置。 |
volatile-random | 在所有设置了过期时间的数据中,随机筛选并淘汰。 |
allkeys-random | 在所有的数据中,随机筛选并淘汰。 |
volatile-ttl | 筛选最接近过期时间(较小的TTL)的数据并淘汰。 |
volatile-lru | 在所有设置了过期时间的数据中,通过LRU算法筛选出数据并淘汰。 |
allkeys-lru | 在所有的数据中,通过LRU算法筛选出数据并淘汰。 |
volatile-lfu | 在设置了过期时间的键中,通过LFU算法筛选出数据并淘汰。 |
allkeys-lfu | 在所有的数据中,通过LFU算法筛选出数据并淘汰。 |
if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
goto cant_free; /* We need to free memory, but policy forbids. */
当内存淘汰策略为noeviction
时,不会淘汰数据,但并不代表不做任何操作,后面cant_free
步骤会讲到。
TTT/LRU/LFU 淘汰
当淘汰策略是根据算法(而不是随机)淘汰时。
struct evictionPoolEntry *pool = EvictionPoolLRU;
……
for (i = 0; i < server.dbnum; i++) {
db = server.db+i;
dict = (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) ?
db->dict : db->expires;
if ((keys = dictSize(dict)) != 0) {
evictionPoolPopulate(i, dict, db->dict, pool);
total_keys += keys;
}
}
先判断是从全部数据里淘汰,还是从有设置了过期过期时间的数据里淘汰。redis的db数据的dict
存的是全部数据(包含有设置过期时间的数据),而expires
里存的是设置了过期时间的数据。EvictionPoolLRU
存的是待释放的数据,初始化如下:
// evict.c
#define EVPOOL_SIZE 16
#define EVPOOL_CACHED_SDS_SIZE 255
struct evictionPoolEntry {
unsigned long long idle; /* Object idle time (inverse frequency for LFU) */
sds key; /* Key name. */
sds cached; /* Cached SDS object for key name. */
int dbid; /* Key DB number. */
};
static struct evictionPoolEntry *EvictionPoolLRU;
void evictionPoolAlloc(void) {
struct evictionPoolEntry *ep;
int j;
ep = zmalloc(sizeof(*ep)*EVPOOL_SIZE);
for (j = 0; j < EVPOOL_SIZE; j++) {
ep[j].idle = 0;
ep[j].key = NULL;
ep[j].cached = sdsnewlen(NULL,EVPOOL_CACHED_SDS_SIZE);
ep[j].dbid = 0;
}
EvictionPoolLRU = ep;
}
EvictionPoolLRU
按idle
从小到达排序,优先淘汰idle
最大的值。也就是淘汰时,从EvictionPoolLRU
尾部查询key
不为空的数据。
进入到evictionPoolPopulate
函数。
count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
// server.h
#define CONFIG_DEFAULT_MAXMEMORY_SAMPLES 5
// dict.c
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
……
if (dictSize(d) < count) count = dictSize(d);
maxsteps = count*10;
……
tables = dictIsRehashing(d) ? 2 : 1;
maxsizemask = d->ht[0].sizemask;
if (tables > 1 && maxsizemask < d->ht[1].sizemask)
maxsizemask = d->ht[1].sizemask;
/* Pick a random point inside the larger table. */
unsigned long i = random() & maxsizemask;
unsigned long emptylen = 0; /* Continuous empty entries so far. */
while(stored < count && maxsteps--) {
for (j = 0; j < tables; j++) {
/* Invariant of the dict.c rehashing: up to the indexes already
* visited in ht[0] during the rehashing, there are no populated
* buckets, so we can skip ht[0] for indexes between 0 and idx-1. */
if (tables == 2 && j == 0 && i < (unsigned long) d->rehashidx) {
/* Moreover, if we are currently out of range in the second
* table, there will be no elements in both tables up to
* the current rehashing index, so we jump if possible.
* (this happens when going from big to small table). */
if (i >= d->ht[1].size)
i = d->rehashidx;
else
continue;
}
if (i >= d->ht[j].size) continue; /* Out of range for this table. */
dictEntry *he = d->ht[j].table[i];
/* Count contiguous empty buckets, and jump to other
* locations if they reach 'count' (with a minimum of 5). */
if (he == NULL) {
emptylen++;
if (emptylen >= 5 && emptylen > count) {
i = random() & maxsizemask;
emptylen = 0;
}
} else {
emptylen = 0;
while (he) {
/* Collect all the elements of the buckets found non
* empty while iterating. */
*des = he;
des++;
he = he->next;
stored++;
if (stored == count) return stored;
}
}
}
i = (i+1) & maxsizemask;
}
return stored;
}
dictGetSomeKeys
是根据规则获取到maxmemory_samples
条记录:
- 随机从db的某个起始位置开始,连续获取指定数量的key。
- 如果db对应的字典正在做
rehash
,可能需要从两个hashtable
来获取key。至于为什么rehash
时要查两个hashtable
,可以看redis的rehash
原理,简单说就是在扩容时,会新建一个容量比较大的数组,然后从现有的数组里逐步迁移数据到新数组(由于redis是单线程,对每个指令都会限制执行时间片,所以并不一定是一次性就迁移完成,整个过程的时间可能会拉的比较长),迁移完之后再用新数据替代现有的数组。 - 如果当前随机到的位置,连续
“5”/“count”两个值比较大的那个值
个槽位空时空的,则重新随机个起始位置,count
值为“maxmemory_samples”/“db内的数据量”两个值比较小的那个值
。 - 如果已经获取到需要的n个数据,则返回。如果没有,则循环重复获取,循环次数不超过
count*10
。 -
通过以上步骤,可以看出:
-
dictGetSomeKeys
函数并不保证可以获取到想要的数据量(count
)。 -
dictGetSomeKeys
函数是随机获取部分数据,所以redis淘汰策略并不保证在所有的数据
里根据规则来计算并淘汰数据。
-
// evict.c
void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
int j, k, count;
dictEntry *samples[server.maxmemory_samples];
count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
for (j = 0; j < count; j++) {
unsigned long long idle;
sds key;
robj *o;
dictEntry *de;
de = samples[j];
key = dictGetKey(de);
/* If the dictionary we are sampling from is not the main
* dictionary (but the expires one) we need to lookup the key
* again in the key dictionary to obtain the value object. */
if (server.maxmemory_policy != MAXMEMORY_VOLATILE_TTL) {
if (sampledict != keydict) de = dictFind(keydict, key);
o = dictGetVal(de);
}
/* Calculate the idle time according to the policy. This is called
* idle just because the code initially handled LRU, but is in fact
* just a score where an higher score means better candidate. */
if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) {
idle = estimateObjectIdleTime(o);
} else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
/* When we use an LRU policy, we sort the keys by idle time
* so that we expire keys starting from greater idle time.
* However when the policy is an LFU one, we have a frequency
* estimation, and we want to evict keys with lower frequency
* first. So inside the pool we put objects using the inverted
* frequency subtracting the actual frequency to the maximum
* frequency of 255. */
idle = 255-LFUDecrAndReturn(o);
} else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
/* In this case the sooner the expire the better. */
idle = ULLONG_MAX - (long)dictGetVal(de);
} else {
serverPanic("Unknown eviction policy in evictionPoolPopulate()");
}
/* Insert the element inside the pool.
* First, find the first empty bucket or the first populated
* bucket that has an idle time smaller than our idle time. */
k = 0;
while (k < EVPOOL_SIZE &&
pool[k].key &&
pool[k].idle < idle) k++;
if (k == 0 && pool[EVPOOL_SIZE-1].key != NULL) {
/* Can't insert if the element is < the worst element we have
* and there are no empty buckets. */
continue;
} else if (k < EVPOOL_SIZE && pool[k].key == NULL) {
/* Inserting into empty position. No setup needed before insert. */
} else {
/* Inserting in the middle. Now k points to the first element
* greater than the element to insert. */
if (pool[EVPOOL_SIZE-1].key == NULL) {
/* Free space on the right? Insert at k shifting
* all the elements from k to end to the right. */
/* Save SDS before overwriting. */
sds cached = pool[EVPOOL_SIZE-1].cached;
memmove(pool+k+1,pool+k,
sizeof(pool[0])*(EVPOOL_SIZE-k-1));
pool[k].cached = cached;
} else {
/* No free space on right? Insert at k-1 */
k--;
/* Shift all elements on the left of k (included) to the
* left, so we discard the element with smaller idle time. */
sds cached = pool[0].cached; /* Save SDS before overwriting. */
if (pool[0].key != pool[0].cached) sdsfree(pool[0].key);
memmove(pool,pool+1,sizeof(pool[0])*k);
pool[k].cached = cached;
}
}
/* Try to reuse the cached SDS string allocated in the pool entry,
* because allocating and deallocating this object is costly
* (according to the profiler, not my fantasy. Remember:
* premature optimizbla bla bla bla. */
int klen = sdslen(key);
if (klen > EVPOOL_CACHED_SDS_SIZE) {
pool[k].key = sdsdup(key);
} else {
memcpy(pool[k].cached,key,klen+1);
sdssetlen(pool[k].cached,klen);
pool[k].key = pool[k].cached;
}
pool[k].idle = idle;
pool[k].dbid = dbid;
}
}
遍历随机获取的key列表,获取key对应的数据,根据TTL/LRU/LFU
计算idle
值,然后插入到EvictionPoolLRU
数组对应的位置。
- 如果数组是满的,且idle小于数组首个数据的idle,则不做插入。
- 如果数组未满,从将要插入位置起,后面的数据全部向后移动。
- 如果数组已满,释放掉数组首个数据,同时从将要插入位置起,前面的数据全部向前移动。
for (k = EVPOOL_SIZE-1; k >= 0; k--) {
if (pool[k].key == NULL) continue;
bestdbid = pool[k].dbid;
if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) {
de = dictFind(server.db[pool[k].dbid].dict,
pool[k].key);
} else {
de = dictFind(server.db[pool[k].dbid].expires,
pool[k].key);
}
/* Remove the entry from the pool. */
if (pool[k].key != pool[k].cached)
sdsfree(pool[k].key);
pool[k].key = NULL;
pool[k].idle = 0;
/* If the key exists, is our pick. Otherwise it is
* a ghost and we need to try the next element. */
if (de) {
bestkey = dictGetKey(de);
break;
} else {
/* Ghost... Iterate again. */
}
}
从EvictionPoolLRU
尾部开始遍历,获取到第一个数据存在
的key,并从EvictionPoolLRU
中清理掉。
随机淘汰
当淘汰策略是随机淘汰时。
else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
{
/* When evicting a random key, we try to evict a key for
* each DB, so we use the static 'next_db' variable to
* incrementally visit all DBs. */
for (i = 0; i < server.dbnum; i++) {
j = (++next_db) % server.dbnum;
db = server.db+j;
dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) ?
db->dict : db->expires;
if (dictSize(dict) != 0) {
de = dictGetRandomKey(dict);
bestkey = dictGetKey(de);
bestdbid = j;
break;
}
}
}
next_db
记录上一次取数据的DB,根据(++next_db) % server.dbnum
算法每次都从不同的DB里随机取一条记录。
/* Finally remove the selected key. */
if (bestkey) {
db = server.db+bestdbid;
robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
propagateExpire(db,keyobj,server.lazyfree_lazy_eviction);
/* We compute the amount of memory freed by db*Delete() alone.
* It is possible that actually the memory needed to propagate
* the DEL in AOF and replication link is greater than the one
* we are freeing removing the key, but we can't account for
* that otherwise we would never exit the loop.
*
* AOF and Output buffer memory will be freed eventually so
* we only care about memory used by the key space. */
delta = (long long) zmalloc_used_memory();
latencyStartMonitor(eviction_latency);
if (server.lazyfree_lazy_eviction)
dbAsyncDelete(db,keyobj);
else
dbSyncDelete(db,keyobj);
latencyEndMonitor(eviction_latency);
latencyAddSampleIfNeeded("eviction-del",eviction_latency);
latencyRemoveNestedEvent(latency,eviction_latency);
delta -= (long long) zmalloc_used_memory();
mem_freed += delta;
server.stat_evictedkeys++;
notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
keyobj, db->id);
decrRefCount(keyobj);
keys_freed++;
/* When the memory to free starts to be big enough, we may
* start spending so much time here that is impossible to
* deliver data to the slaves fast enough, so we force the
* transmission here inside the loop. */
if (slaves) flushSlavesOutputBuffers();
/* Normally our stop condition is the ability to release
* a fixed, pre-computed amount of memory. However when we
* are deleting objects in another thread, it's better to
* check, from time to time, if we already reached our target
* memory, since the "mem_freed" amount is computed only
* across the dbAsyncDelete() call, while the thread can
* release the memory all the time. */
if (server.lazyfree_lazy_eviction && !(keys_freed % 16)) {
if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) {
/* Let's satisfy our stop condition. */
mem_freed = mem_tofree;
}
}
}
if (!keys_freed) {
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("eviction-cycle",latency);
goto cant_free; /* nothing to free... */
}
- 根据
lazyfree_lazy_eviction
决定是异步还是同步清理数据,再比较删除前后当前内存使用情况(zmalloc_used_memory)
的差值,作为被清理掉的内存大小。 - 通过
dbAsyncDelete/dbSyncDelete
删除数据,通过propagateExpire
发送删除事件写入AOF
和发给从节点
。 - 当这一次遍历数据,但没有清理任何数据时(keys_freed值为0),跳到
cant_free
环节。
// bio.h
#define BIO_CLOSE_FILE 0 /* Deferred close(2) syscall. */
#define BIO_AOF_FSYNC 1 /* Deferred AOF fsync. */
#define BIO_LAZY_FREE 2 /* Deferred objects freeing. */
#define BIO_NUM_OPS 3
cant_free:
/* We are here if we are not able to reclaim memory. There is only one
* last thing we can try: check if the lazyfree thread has jobs in queue
* and wait... */
while(bioPendingJobsOfType(BIO_LAZY_FREE)) {
if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree)
break;
usleep(1000);
}
return C_ERR;
如果进入到cant_free
环节,说明当前配置的是不淘汰内存、或者没有淘汰掉任何内存。
如果当前有BIO_LAZY_FREE
类型的任务,即有未执行完成的内存释放任务
,睡眠等待任务完成,直到释放了足够的内存,或者任务全部执行完成。
int freeMemoryIfNeeded(void) {
……
if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL) == C_OK)
return C_OK;
……
if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
goto cant_free;
……
mem_freed = 0;
……
while (mem_freed < mem_tofree) {
int keys_freed = 0;
……
if (bestkey) {
delta = (long long) zmalloc_used_memory();
……
delta -= (long long) zmalloc_used_memory();
mem_freed += delta;
keys_freed++;
……
}
if (!keys_freed) {
……
goto cant_free;
}
}
……
}
在“不释放内存/单次循环内没有内存可释放/在释放了足够内存之后”,方法结束。
综上,freeMemoryIfNeeded
大致流程:
- 获取当前内存使用情况,判断是否超过配置的最大内存,如果没有,则结束。
- 根据内存使用情况和配置的最大内存,获取到需要清理的内存大小。
-
配置哪种策略:
- 不淘汰策略:不执行任何清理操作,但不代表方法结束,而是调到第6步。
- 随机淘汰策略:轮询DB,随机获取一条记录。
-
LRU/TTL/LFU策略:
- 根据LRU/TTL/LFU获取key的淘汰值
idle
,值越大越优先淘汰。 - 将当前key的
idle
跟淘汰数组EvictionPoolLRU
里的数据做比较并插入,EvictionPoolLRU
的排序为idle
从小到大。 - 从数组尾巴遍历
EvictionPoolLRU
数组,取第一条记录。
- 根据LRU/TTL/LFU获取key的淘汰值
-
如果上一步有取到数据:
- 获取当前内存情况1。
- 写入删除AOF、通知从库、删除数据。
- 获取当前内存情况2。
-
内存情况1-内存情况2
的值作为当次清理掉的内存数,并累加到mem_freed
。
- 如果
mem_freed
已经达到要目标,则结束。 -
如果当前配置的是不淘汰策略,或者前面没有淘汰到足够的内存:
- 如果当前有未执行完成的
内存释放任务
,则循环睡眠1000微妙,每次睡眠后重新获取当前内存,判断是否有减少,并累加到mem_freed
。 - 循环直到
mem_freed
已经达到要目标或者内存释放任务
都已经完成。
- 如果当前有未执行完成的
同时,redis的TTL/LRU/LFU
并不是在所有的数据里做比较,而是随机取部分数据做比较。这是因为如果遍历所有的数据并排序,所耗的时间、内存和CPU,对redis这种高性能工具来说,是不可接受的。
完整代码:
// evict.c
/* This function is periodically called to see if there is memory to free
* according to the current "maxmemory" settings. In case we are over the
* memory limit, the function will try to free some memory to return back
* under the limit.
*
* The function returns C_OK if we are under the memory limit or if we
* were over the limit, but the attempt to free memory was successful.
* Otehrwise if we are over the memory limit, but not enough memory
* was freed to return back under the limit, the function returns C_ERR. */
int freeMemoryIfNeeded(void) {
/* By default replicas should ignore maxmemory
* and just be masters exact copies. */
if (server.masterhost && server.repl_slave_ignore_maxmemory) return C_OK;
size_t mem_reported, mem_tofree, mem_freed;
mstime_t latency, eviction_latency;
long long delta;
int slaves = listLength(server.slaves);
/* When clients are paused the dataset should be static not just from the
* POV of clients not being able to write, but also from the POV of
* expires and evictions of keys not being performed. */
if (clientsArePaused()) return C_OK;
if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL) == C_OK)
return C_OK;
mem_freed = 0;
if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
goto cant_free; /* We need to free memory, but policy forbids. */
latencyStartMonitor(latency);
while (mem_freed < mem_tofree) {
int j, k, i, keys_freed = 0;
static unsigned int next_db = 0;
sds bestkey = NULL;
int bestdbid;
redisDb *db;
dict *dict;
dictEntry *de;
if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU) ||
server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL)
{
struct evictionPoolEntry *pool = EvictionPoolLRU;
while(bestkey == NULL) {
unsigned long total_keys = 0, keys;
/* We don't want to make local-db choices when expiring keys,
* so to start populate the eviction pool sampling keys from
* every DB. */
for (i = 0; i < server.dbnum; i++) {
db = server.db+i;
dict = (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) ?
db->dict : db->expires;
if ((keys = dictSize(dict)) != 0) {
evictionPoolPopulate(i, dict, db->dict, pool);
total_keys += keys;
}
}
if (!total_keys) break; /* No keys to evict. */
/* Go backward from best to worst element to evict. */
for (k = EVPOOL_SIZE-1; k >= 0; k--) {
if (pool[k].key == NULL) continue;
bestdbid = pool[k].dbid;
if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) {
de = dictFind(server.db[pool[k].dbid].dict,
pool[k].key);
} else {
de = dictFind(server.db[pool[k].dbid].expires,
pool[k].key);
}
/* Remove the entry from the pool. */
if (pool[k].key != pool[k].cached)
sdsfree(pool[k].key);
pool[k].key = NULL;
pool[k].idle = 0;
/* If the key exists, is our pick. Otherwise it is
* a ghost and we need to try the next element. */
if (de) {
bestkey = dictGetKey(de);
break;
} else {
/* Ghost... Iterate again. */
}
}
}
}
/* volatile-random and allkeys-random policy */
else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
{
/* When evicting a random key, we try to evict a key for
* each DB, so we use the static 'next_db' variable to
* incrementally visit all DBs. */
for (i = 0; i < server.dbnum; i++) {
j = (++next_db) % server.dbnum;
db = server.db+j;
dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) ?
db->dict : db->expires;
if (dictSize(dict) != 0) {
de = dictGetRandomKey(dict);
bestkey = dictGetKey(de);
bestdbid = j;
break;
}
}
}
/* Finally remove the selected key. */
if (bestkey) {
db = server.db+bestdbid;
robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
propagateExpire(db,keyobj,server.lazyfree_lazy_eviction);
/* We compute the amount of memory freed by db*Delete() alone.
* It is possible that actually the memory needed to propagate
* the DEL in AOF and replication link is greater than the one
* we are freeing removing the key, but we can't account for
* that otherwise we would never exit the loop.
*
* AOF and Output buffer memory will be freed eventually so
* we only care about memory used by the key space. */
delta = (long long) zmalloc_used_memory();
latencyStartMonitor(eviction_latency);
if (server.lazyfree_lazy_eviction)
dbAsyncDelete(db,keyobj);
else
dbSyncDelete(db,keyobj);
latencyEndMonitor(eviction_latency);
latencyAddSampleIfNeeded("eviction-del",eviction_latency);
latencyRemoveNestedEvent(latency,eviction_latency);
delta -= (long long) zmalloc_used_memory();
mem_freed += delta;
server.stat_evictedkeys++;
notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
keyobj, db->id);
decrRefCount(keyobj);
keys_freed++;
/* When the memory to free starts to be big enough, we may
* start spending so much time here that is impossible to
* deliver data to the slaves fast enough, so we force the
* transmission here inside the loop. */
if (slaves) flushSlavesOutputBuffers();
/* Normally our stop condition is the ability to release
* a fixed, pre-computed amount of memory. However when we
* are deleting objects in another thread, it's better to
* check, from time to time, if we already reached our target
* memory, since the "mem_freed" amount is computed only
* across the dbAsyncDelete() call, while the thread can
* release the memory all the time. */
if (server.lazyfree_lazy_eviction && !(keys_freed % 16)) {
if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) {
/* Let's satisfy our stop condition. */
mem_freed = mem_tofree;
}
}
}
if (!keys_freed) {
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("eviction-cycle",latency);
goto cant_free; /* nothing to free... */
}
}
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("eviction-cycle",latency);
return C_OK;
cant_free:
/* We are here if we are not able to reclaim memory. There is only one
* last thing we can try: check if the lazyfree thread has jobs in queue
* and wait... */
while(bioPendingJobsOfType(BIO_LAZY_FREE)) {
if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree)
break;
usleep(1000);
}
return C_ERR;
}
存储
redis是怎么存储LRU/LFU数值的呢?redis对象里有个字段专门用来记录数值。
struct redisObject {
……
unsigned lru:LRU_BITS;
……
};
LRU_BITS 占24bit:
- 如果是LRU算法,记录LRU时间,可以用object idletime {key}命令查看。
- 如果是LFU算法,则分为高16位和低8位,高16位记录上一次访问衰减时间,低8位记录计数器(Counter)数值。
至于这个数值是怎么维护的,就不在本篇文章展开了。
参考:
《[[redis 源码走读] maxmemory 数据淘汰策略]( https://blog.csdn.net/wenfh20...》
《 走近源码:Redis如何清除过期key》
《 Redis中的LFU算法》
《 redis内存限制管理---maxmemory配置详解》