基于redis5.0
入口
redis周期删除过期数据的函数是activeExpireCycle
,有两个地方调用这个函数:
- beforeSleep:redis在阻塞等待文件事件之前,即进程阻塞之前,会调用
beforeSleep
函数,清理过期数据的模式是ACTIVE_EXPIRE_CYCLE_FAST
,快速清理,不影响主流程。
// server.c
/* This function gets called every time Redis is entering the
* main loop of the event driven library, that is, before to sleep
* for ready file descriptors. */
void beforeSleep(struct aeEventLoop *eventLoop) {
……
/* Run a fast expire cycle (the called function will return
* ASAP if a fast cycle is not needed). */
if (server.active_expire_enabled && server.masterhost == NULL)
activeExpireCycle(ACTIVE_EXPIRE_CYCLE_FAST);
……
}
- databasesCron:redis周期任务,清理过期数据的模式是
ACTIVE_EXPIRE_CYCLE_SLOW
,相对ACTIVE_EXPIRE_CYCLE_FAST
执行时间稍微长点。
// server.c
void databasesCron(void) {
/* Expire keys by random sampling. Not required for slaves
* as master will synthesize DELs for us. */
if (server.active_expire_enabled) {
if (server.masterhost == NULL) {
activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW);
} else {
expireSlaveKeys();
}
}
……
}
当前节点为master
时,server.masterhost == NULL
,才执行activeExpireCycle
函数。
清理
是否允许FAST模式
// server.h
int active_expire_effort; /* 努力力度,1-10取值范围,默认 1,也就是遍历过期字典的力度,力度越大,遍历数量越多,但是性能损耗更多*/
double stat_expired_stale_perc; /* 已过期键占比近似值:以先前值占比95%,本次调用运行值占比 5%进行加权平均 */
// expire.c
#define ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE 10 /* % of stale keys after which we do extra efforts. */
#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds. */
void activeExpireCycle(int type) {
effort = server.active_expire_effort-1, /* Rescale from 0 to 9. */
config_cycle_fast_duration = ACTIVE_EXPIRE_CYCLE_FAST_DURATION + ACTIVE_EXPIRE_CYCLE_FAST_DURATION/4*effort,
config_cycle_acceptable_stale = ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE - effort;
long long start = ustime() /* server.c,单位us */
……
if (type == ACTIVE_EXPIRE_CYCLE_FAST) {
/* Don't start a fast cycle if the previous cycle did not exit
* for time limit, unless the percentage of estimated stale keys is
* too high. Also never repeat a fast cycle for the same period
* as the fast cycle total duration itself. */
if (!timelimit_exit &&
server.stat_expired_stale_perc < config_cycle_acceptable_stale)
return;
if (start < last_fast_cycle + (long long)config_cycle_fast_duration*2)
return;
last_fast_cycle = start;
}
……
}
执行FAST
模式有几个前提条件:
- 上一次任务不是因为超时而退出,且已过期键占比近似值
server.stat_expired_stale_perc
小于可容忍上限config_cycle_acceptable_stale
:也就是说过期数据没有那么多,SHOW
模式够用,不需要通过FAST
来协助。 - 距离上一次
FAST
时间,未超过指定的时间间隔,默认是2000us
。
时间&数量限制
// server.h
#define CRON_DBS_PER_CALL 16 /* 每次检查多少个数据库 */
#define CONFIG_DEFAULT_HZ 10 /* Time interrupt calls/sec. */
// server.c
server.hz = server.config_hz = CONFIG_DEFAULT_HZ;
// expire.c
#define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 /* Max % of CPU to use. */
#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds. */
void activeExpireCycle(int type) {
……
int dbs_per_call = CRON_DBS_PER_CALL;
config_cycle_slow_time_perc = ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC + 2 * effort,
config_cycle_fast_duration = ACTIVE_EXPIRE_CYCLE_FAST_DURATION + ACTIVE_EXPIRE_CYCLE_FAST_DURATION/4*effort,
……
/* We usually should test CRON_DBS_PER_CALL per iteration, with
* two exceptions:
*
* 1) Don't test more DBs than we have.
* 2) If last time we hit the time limit, we want to scan all DBs
* in this iteration, as there is work to do in some DB and we don't want
* expired keys to use memory for too much time. */
if (dbs_per_call > server.dbnum || timelimit_exit)
dbs_per_call = server.dbnum;
/* We can use at max 'config_cycle_slow_time_perc' percentage of CPU
* time per iteration. Since this function gets called with a frequency of
* server.hz times per second, the following is the max amount of
* microseconds we can spend in this function. */
timelimit = config_cycle_slow_time_perc*1000000/server.hz/100;
timelimit_exit = 0;
if (timelimit <= 0) timelimit = 1;
if (type == ACTIVE_EXPIRE_CYCLE_FAST)
timelimit = config_cycle_fast_duration; /* in microseconds. */
……
}
一次扫描dbs_per_call
个DB,默认值为16
:
- 如果超过redis的DB数量,则
dbs_per_call
改为redis的DB数量。 - 如果上一次是因为timelimit_exit(超时)而结束,说明过期数据很多,则需要扫描所有DB,尽快清理掉所有过期数据,
dbs_per_call
改为redis的DB数量。
计算单次执行时间timelimit:
SLOW
模式下,默认为25*1000000/10/100
us。FAST
模式下,默认为1000
us。
循环
// dict.h
/* This is the initial size of every hash table */
#define DICT_HT_INITIAL_SIZE 4
// expire.c
void activeExpireCycle(int type) {
……
static unsigned int current_db = 0; /* Last DB tested. */
……
for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) {
/* Expired and checked in a single loop. */
unsigned long expired, sampled;
redisDb *db = server.db+(current_db % server.dbnum);
/* Increment the DB now so we are sure if we run out of time
* in the current DB we'll restart from the next. This allows to
* distribute the time evenly across DBs. */
current_db++;
/* Continue to expire if at the end of the cycle there are still
* a big percentage of keys to expire, compared to the number of keys
* we scanned. The percentage, stored in config_cycle_acceptable_stale
* is not fixed, but depends on the Redis configured "expire effort". */
do {
unsigned long num, slots;
long long now, ttl_sum;
int ttl_samples;
iteration++;
/* If there is nothing to expire try next DB ASAP. */
if ((num = dictSize(db->expires)) == 0) {
db->avg_ttl = 0;
break;
}
slots = dictSlots(db->expires);
now = mstime();
/* When there are less than 1% filled slots, sampling the key
* space is expensive, so stop here waiting for better times...
* The dictionary will be resized asap. */
if (slots > DICT_HT_INITIAL_SIZE &&
(num*100/slots < 1)) break;
……
/* We can't block forever here even if there are many keys to
* expire. So after a given amount of milliseconds return to the
* caller waiting for the other active expire cycle. */
if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */
elapsed = ustime()-start;
if (elapsed > timelimit) {
timelimit_exit = 1;
server.stat_expired_time_cap_reached_count++;
break;
}
}
/* We don't repeat the cycle for the current database if there are
* an acceptable amount of stale keys (logically expired but yet
* not reclaimed). */
} while (……);
}
……
}
在未超时的前提下,循环DB清理数据:
- 静态变量
current_db
记录上一次清理的DB序号,每次都加1。 - 如果当前DB没有
设置了过期时间
的记录,则结束当前DB循环,清理下一个DB。 - 如果存在过期数据,但是存储空间使用率不足1%(过期数据量/hash桶数量 < 0.01),这样遍历数据有效命中率会很低,处理起来会浪费时间,后面的访问会很快触发字典的缩容,缩容后再进行处理效率更高, 结束当前DB循环,清理下一个DB。
- 每循环
16(0xf)
次检查下是否已经超时,如果已经超时,则设置超时标志timelimit_exit
,即结束当前任务。
清理
// expire.c
#define ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP 20 /* Keys for each DB loop. */
void activeExpireCycle(int type) {
……
config_keys_per_loop = ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP + ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP/4 * effort,
……
long total_sampled = 0;
long total_expired = 0;
for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) {
……
do {
unsigned long num, slots;
long long now, ttl_sum;
int ttl_samples;
iteration++;
……
expired = 0;
sampled = 0;
ttl_sum = 0;
ttl_samples = 0;
if (num > config_keys_per_loop)
num = config_keys_per_loop;
long max_buckets = num*20;
long checked_buckets = 0;
while (sampled < num && checked_buckets < max_buckets) {
for (int table = 0; table < 2; table++) {
if (table == 1 && !dictIsRehashing(db->expires)) break;
unsigned long idx = db->expires_cursor;
idx &= db->expires->ht[table].sizemask;
dictEntry *de = db->expires->ht[table].table[idx];
long long ttl;
/* Scan the current bucket of the current table. */
checked_buckets++;
while(de) {
/* Get the next entry now since this entry may get
* deleted. */
dictEntry *e = de;
de = de->next;
ttl = dictGetSignedIntegerVal(e)-now;
if (activeExpireCycleTryExpire(db,e,now)) expired++;
if (ttl > 0) {
/* We want the average TTL of keys yet
* not expired. */
ttl_sum += ttl;
ttl_samples++;
}
sampled++;
}
}
db->expires_cursor++;
}
total_expired += expired;
total_sampled += sampled;
/* Update the average TTL stats for this database. */
if (ttl_samples) {
long long avg_ttl = ttl_sum/ttl_samples;
/* Do a simple running average with a few samples.
* We just use the current estimate with a weight of 2%
* and the previous estimate with a weight of 98%. */
if (db->avg_ttl == 0) db->avg_ttl = avg_ttl;
db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50);
}
} while (sampled == 0 || (expired*100/sampled) > config_cycle_acceptable_stale);
}
……
}
int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) {
long long t = dictGetSignedIntegerVal(de);
if (now > t) {
sds key = dictGetKey(de);
robj *keyobj = createStringObject(key,sdslen(key));
propagateExpire(db,keyobj,server.lazyfree_lazy_expire);
if (server.lazyfree_lazy_expire)
dbAsyncDelete(db,keyobj);
else
dbSyncDelete(db,keyobj);
notifyKeyspaceEvent(NOTIFY_EXPIRED,
"expired",keyobj,db->id);
signalModifiedKey(NULL, db, keyobj);
decrRefCount(keyobj);
server.stat_expiredkeys++;
return 1;
} else {
return 0;
}
}
- 设定最大允许抽查的过期数据数量(num),抽查的数据最大不超过
config_keys_per_loop(默认20)
。 - 设定最大允许抽查的hash桶数量(max_buckets),抽查的桶数量最大不超过
config_keys_per_loop的20倍(即默认400)
。
redis的hash数据结构是数组+链表
的方式,每遍历一格数组,抽查的bucket数量就加一;而只有在链表里有数据的情况下,每检查一个数据对象(dictEntry),才算入num计数中;
极端情况下:遍历了max_buckets个bucket都没有过期数据,或者一个bucket内就拥有了num个过期数据。
- 当hash正在扩容时,redis会复制出一个hash对象,所以是遍历两个table
for (int table = 0; table < 2; table++)
,但是在遍历第二个table是会通过dictIsRehashing
判断是否存在(即是否正在扩容)。 - 通过
activeExpireCycleTryExpire
清理过期数据。 - 根据清理结果,重新设置当前DB的平均过期时间数据(用于查看/统计redis DB情况)。
如果一轮抽样到的 key 中过期的比例是在可容忍的范围config_cycle_acceptable_stale(默认10%)
,那么这个 db 就不必再抽样了,清理下一个DB,否则继续清理当前DB。
收尾
void activeExpireCycle(int type) {
……
elapsed = ustime()-start;
server.stat_expire_cycle_time_used += elapsed;
latencyAddSampleIfNeeded("expire-cycle",elapsed/1000);
/* Update our estimate of keys existing but yet to be expired.
* Running average with this sample accounting for 5%. */
double current_perc;
if (total_sampled) {
current_perc = (double)total_expired/total_sampled;
} else
current_perc = 0;
server.stat_expired_stale_perc = (current_perc*0.05)+ (server.stat_expired_stale_perc*0.95);
}
重新计算已过期键占比近似值
:本轮占比 5%,历史值占比 95%。
参考
Redis 源码分析之 key 过期
Redis 源码阅读 --- Database
[[redis 源码走读] redis 过期策略]( https://wenfh2020.com/2020/02...