redis Key过期及删除 LazyFree 渐进式Rehash 内存淘汰

1. key操作

keys *
keys n*r
keys nave?

scan 0
scan 0 match n*r count 5

del key1 key2
unlink key1 key2
exists key1
rename a b
expire a 10
ttl a
type a
dbsize
randomkey
debug object key1

flushdb async
flushall async

2. 过期

2.1 使用

过期后根据过期删除策略进行删除。

# 通用
expire key time_in_second
pexpire key time_in_millisecond
pexpireat key timestamp_in_millisecond
expireat key timestamp_in_second

# String 且 nx(if not exits)
set key value ex 5 nx

# String 且 xx
set key value px 5000 xx
setex key 10 value

# 查看
ttl key
pttl key

2.2 过期字典(expires)


数据结构

struct redisServer {
    redisDb *db;
    int dbnum; 
    ...
}
typedef struct redisDb {
    dict *dict;     // 保存着当前数据库中所有键值对
    dict *expires   // 过期字典,保存着键的过期时间
    ...
} redisDb;
字典
typedef struct dict {
    //类型
    dictType *type;
    void *privdata;
    //两个项数组
    dictht ht[2];
    //rehash进度,没有进行rehash,为-1
    long rehashidx;
    unsigned long iterators;
} dict;
hashtable
typedef struct dictht {
    dictEntry **table; #hash表数组
    unsigned long size; #哈希表大小
    unsigned long sizemask; #哈希表大小掩码,=size-1
    unsigned long used; #已有节点数
} dictht;
hash表节点
typedef struct dictEntry {
    //键
    void *key;
    //值
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    struct dictEntry *next;  //指向下一个节点,形成链表
} dictEntry;


浏览源码


expire.c/expireGenericCommand(xxx)

void expireGenericCommand(client *c, long long basetime, int unit) {
    robj *key = c->argv[1], *param = c->argv[2];
    long long when; /* unix time in milliseconds when the key will expire. */

    if (getLongLongFromObjectOrReply(c, param, &when, NULL) != C_OK)
        return;

    if (unit == UNIT_SECONDS) when *= 1000;
    when += basetime;

    //没有key,返回
    if (lookupKeyWrite(c->db,key) == NULL) {
        addReply(c,shared.czero);
        return;
    }

    //检查是否已经过期
    if (checkAlreadyExpired(when)) {
        robj *aux;

        int deleted = server.lazyfree_lazy_expire ? dbAsyncDelete(c->db,key) :
                                                    dbSyncDelete(c->db,key);
        serverAssertWithInfo(c,key,deleted);
        server.dirty++;

        /* Replicate/AOF this as an explicit DEL or UNLINK. */
        aux = server.lazyfree_lazy_expire ? shared.unlink : shared.del;
        rewriteClientCommandVector(c,2,aux,key);
        signalModifiedKey(c,c->db,key);
        notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
        addReply(c, shared.cone);
        return;
    } else {
        //设置过期
        setExpire(c,c->db,key,when);
        addReply(c,shared.cone);
        signalModifiedKey(c,c->db,key);
        notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",key,c->db->id);
        server.dirty++;
        return;
    }
}


db.c/setExpire(xxx)

void setExpire(client *c, redisDb *db, robj *key, long long when) {
    dictEntry *kde, *de;

    # dictEntry
    kde = dictFind(db->dict,key->ptr);
    serverAssertWithInfo(NULL,key,kde != NULL);
    # dictEntry
    de = dictAddOrFind(db->expires,dictGetKey(kde));
    # 给节点设值
    dictSetSignedIntegerVal(de,when);

    int writable_slave = server.masterhost && server.repl_slave_ro == 0;
    if (c && writable_slave && !(c->flags & CLIENT_MASTER))
        rememberSlaveKeyWithExpire(db,key);
}


dict.h/setExpire(xxx)

\#define dictSetSignedIntegerVal(entry, _val_) \
    do { (entry)->v.s64 = _val_; } while(0)

3. 过期后的删除策略

默认同时具有惰性删除定期删除两种机制。

3.1 惰性删除

操作某key时,检查过期。

image

源码路径


server.c/redisCommandTable[]

struct redisCommand redisCommandTable[] = {
    {"module",moduleCommand,-2,
     "admin no-script",
     0,NULL,0,0,0,0,0,0},

    {"get",getCommand,2,
     "read-only fast @string",
     0,NULL,1,1,1,0,0,0},
     
     ...


t_string.c/getCommand()

void getCommand(client *c) {
    getGenericCommand(c);
}


t_string.c/getGenericCommand()

int getGenericCommand(client *c) {
    robj *o;

    if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp])) == NULL)
        return C_OK;

    if (o->type != OBJ_STRING) {
        addReply(c,shared.wrongtypeerr);
        return C_ERR;
    } else {
        addReplyBulk(c,o);
        return C_OK;
    }
}


db.c/lookupKeyReadOrReply()

robj *lookupKeyReadOrReply(client *c, robj *key, robj *reply) {
    robj *o = lookupKeyRead(c->db, key);
    if (!o) addReply(c,reply);
    return o;
}


db.c/lookupKeyRead()

robj *lookupKeyRead(redisDb *db, robj *key) {
    return lookupKeyReadWithFlags(db,key,LOOKUP_NONE);
}


db.c/lookupKeyReadWithFlags()

robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) {
    robj *val;

    if (expireIfNeeded(db,key) == 1) {
        if (server.masterhost == NULL) {
            server.stat_keyspace_misses++;
            notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id);
            return NULL;
        }

        if (server.current_client &&
            server.current_client != server.master &&
            server.current_client->cmd &&
            server.current_client->cmd->flags & CMD_READONLY)
        {
            server.stat_keyspace_misses++;
            notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id);
            return NULL;
        }
    }
    val = lookupKey(db,key,flags);
    if (val == NULL) {
        server.stat_keyspace_misses++;
        notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id);
    }
    else
        server.stat_keyspace_hits++;
    return val;
}


db.c/expireIfNeeded()

int expireIfNeeded(redisDb *db, robj *key) {
    //没有过期,返回0
    if (!keyIsExpired(db,key)) return 0;
    // 从节点返回过期,等待主节点通知
    if (server.masterhost != NULL) return 1;
    // 统计
    server.stat_expiredkeys++;
    // 内部通知slave和AOF file
    propagateExpire(db,key,server.lazyfree_lazy_expire);
    // 键空间失效 ->(延迟任务)
    notifyKeyspaceEvent(NOTIFY_EXPIRED,
        "expired",key,db->id);
    // 根据lazyfree_lazy_expire配置判断是否采用后台进程删除(redis-4.0+)
    int retval = server.lazyfree_lazy_expire ? dbAsyncDelete(db,key) :
                                               dbSyncDelete(db,key);
    if (retval) signalModifiedKey(NULL,db,key);
    return retval;
}


lazy free机制
  • 描述

采用后台进程释放。


lazyfree.c/dbAsyncDelete()

#define LAZYFREE_THRESHOLD 64
int dbAsyncDelete(redisDb *db, robj *key) {
    if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr);

    dictEntry *de = dictUnlink(db->dict,key->ptr);
    if (de) {
        robj *val = dictGetVal(de);
        size_t free_effort = lazyfreeGetFreeEffort(val);

        if (free_effort > LAZYFREE_THRESHOLD && val->refcount == 1) {
            atomicIncr(lazyfree_objects,1);
            bioCreateBackgroundJob(BIO_LAZY_FREE,val,NULL,NULL);
            dictSetVal(db->dict,de,NULL);
        }
    }
    
    if (de) {
        dictFreeUnlinkedEntry(db->dict,de);
        if (server.cluster_enabled) slotToKeyDel(key->ptr);
        return 1;
    } else {
        return 0;
    }
}


bio.c/bioCreateBackgroundJob()

void bioCreateBackgroundJob(int type, void *arg1, void *arg2, void *arg3) {
    struct bio_job *job = zmalloc(sizeof(*job));

    job->time = time(NULL);
    job->arg1 = arg1;
    job->arg2 = arg2;
    job->arg3 = arg3;
    pthread_mutex_lock(&bio_mutex[type]);
    # 将当前job放入链表尾部
    listAddNodeTail(bio_jobs[type],job);
    bio_pending[type]++;
    pthread_cond_signal(&bio_newjob_cond[type]);
    pthread_mutex_unlock(&bio_mutex[type]);
}

  • 优劣

好处:redis单线程,异步线程防阻塞。
坏处:内存释放不及时。

  • 主动删除(2种)

unlink(值超过LAZYFREE_THRESHOLD 64时启用lazy free)
flushdb async 和 flushall async

  • 被动删除(4种lazy free)
配置 默认值 含义
lazyfree-lazy-eviction no 淘汰机制生效时,是否lazy free
lazyfree-lazy-expire no TTL过期且命令触发时,是否lazy free
lazyfree-lazy-server-del(见下图) no del
hdel
rename dest key1
lpop, rpop, lrem, rpoplpush,brpop
srem,smove,spop,sdiffstore,sinterstore,sunionstore
zrem,zpopmin,zpopmax
bitop or dest key1 key2
georadius ... store dest
sort key store dest
replica(slave)-lazy-flush no slave接收RDB文件时的flushall,默认no。内存变化大,建议no。内存变化小,建议yes。

3.2 定期删除

默认每秒10次(100ms一次)
规定时间内,分多次遍历各个数据库,从expires过期字典随机检查部分键的过期时间。

配置 redis.conf
hz 10
dynamic-hz yes (默认yes)

源码路径


server.c/serverCron()

旧版redis.c/serverCron
新版server.c/serverCron

更新服务器的内存占用、数据库占用情况。- zmalloc_used_memory(), zmalloc_get_allocator_info()
databasesCron()
关闭和清理连接失效的客户端。 - clientsArePaused()
尝试进行 AOF 或 RDB 持久化操作。- rewriteAppendOnlyFileBackground(), rdbSaveBackground()
如果服务器是主节点的话,对附属节点进行定期同步。
如果处于集群模式的话,对集群进行定期同步和连接测试。- clusterCron()
如果处于哨兵模式的话,运行哨兵检查机制。


server.c/databasesCron()

void databasesCron(void) {
    if (server.active_expire_enabled) {
        if (iAmMaster()) {
            activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW);
        } else {
            expireSlaveKeys();
        }
    }

    activeDefragCycle();

    if (!hasActiveChildProcess()) {
        static unsigned int resize_db = 0;
        static unsigned int rehash_db = 0;
        int dbs_per_call = CRON_DBS_PER_CALL;
        int j;

        if (dbs_per_call > server.dbnum) dbs_per_call = server.dbnum;

        # resize
        for (j = 0; j < dbs_per_call; j++) {
            tryResizeHashTables(resize_db % server.dbnum);
            resize_db++;
        }

        #配置了activerehashing yes,默认yes
        if (server.activerehashing) {
            for (j = 0; j < dbs_per_call; j++) {
                int work_done = incrementallyRehash(rehash_db);
                if (work_done) {
                    break;
                } else {
                    rehash_db++;
                    rehash_db %= server.dbnum;
                }
            }
        }
    }
}


expire.c/activeExpireCycle()

#define ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP 20
#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 
#define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 
#define ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE 10 
void activeExpireCycle(int type) {
    unsigned long
    #改变有效期努力值,配置active_expire_effort是1-10,
    effort = server.active_expire_effort-1,
    #每次任务开始前抽样大小
    config_keys_per_loop = ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP +
                           ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP/4*effort,
    #定时任务间隔时间,微秒
    config_cycle_fast_duration = ACTIVE_EXPIRE_CYCLE_FAST_DURATION +
                                 ACTIVE_EXPIRE_CYCLE_FAST_DURATION/4*effort,
    #CPU使用率阀值
    config_cycle_slow_time_perc = ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC +
                                  2*effort,
    config_cycle_acceptable_stale = ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE-
                                    effort;

    static long long last_fast_cycle = 0;

    int j, iteration = 0;
    int dbs_per_call = CRON_DBS_PER_CALL;
    long long start = ustime(), timelimit, elapsed;

    if (clientsArePaused()) return;

    if (type == ACTIVE_EXPIRE_CYCLE_FAST) {

        #如果上次不是因为超时而结束,并且当前过期键数量小于可容忍的过期键数量,不处理
        if (!timelimit_exit &&
            server.stat_expired_stale_perc < config_cycle_acceptable_stale)
            return;

        #如果上次开始时间+2倍的任务间隔大于当前时间,则当前任务提前结束
        if (start < last_fast_cycle + (long long)config_cycle_fast_duration*2)
            return;

        last_fast_cycle = start;
    }

    #以服务器数量为准
    if (dbs_per_call > server.dbnum || timelimit_exit)
        dbs_per_call = server.dbnum;

    timelimit = config_cycle_slow_time_perc*1000000/server.hz/100;
    timelimit_exit = 0;
    if (timelimit <= 0) timelimit = 1;

    if (type == ACTIVE_EXPIRE_CYCLE_FAST)
        timelimit = config_cycle_fast_duration;

    long total_sampled = 0;
    long total_expired = 0;

    #依次处理dbs_per_call个数据库
    for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) {
        unsigned long expired, sampled;
        
        #获取当前db,并用current_db记录进度
        redisDb *db = server.db+(current_db % server.dbnum);
        current_db++;
        
        do {
            unsigned long num, slots;
            long long now, ttl_sum;
            int ttl_samples;
            iteration++;

            #db中没有过期键,不处理
            if ((num = dictSize(db->expires)) == 0) {
                db->avg_ttl = 0;
                break;
            }
            slots = dictSlots(db->expires);
            now = mstime();

            if (num && slots > DICT_HT_INITIAL_SIZE &&
                (num*100/slots < 1)) break;

            expired = 0;
            sampled = 0;
            ttl_sum = 0;
            ttl_samples = 0;

            if (num > config_keys_per_loop)
                num = config_keys_per_loop;

            #最大的hash桶数量
            long max_buckets = num*20;
            #当前检查的hash桶
            long checked_buckets = 0;

            #采样数量小于每个db的默认检查键数量,当前检查的桶小于最大桶
            while (sampled < num && checked_buckets < max_buckets) {
                for (int table = 0; table < 2; table++) {
                    if (table == 1 && !dictIsRehashing(db->expires)) break;
                    
                    #从当前db的过期游标开始检查过期键
                    unsigned long idx = db->expires_cursor;
                    idx &= db->expires->ht[table].sizemask;
                    dictEntry *de = db->expires->ht[table].table[idx];
                    long long ttl;

                    checked_buckets++;
                    while(de) {
                        dictEntry *e = de;
                        de = de->next;
                        ttl = dictGetSignedIntegerVal(e)-now;
                        #如果key过期,删除并计数+1
                        if (activeExpireCycleTryExpire(db,e,now)) expired++;
                        if (ttl > 0) {
                            #为了统计平均ttl
                            ttl_sum += ttl;
                            ttl_samples++;
                        }
                        sampled++;
                    }
                }
                db->expires_cursor++;
            }
            total_expired += expired;
            total_sampled += sampled;

            if (ttl_samples) {
                long long avg_ttl = ttl_sum/ttl_samples;

                if (db->avg_ttl == 0) db->avg_ttl = avg_ttl;
                db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50);
            }

            if ((iteration & 0xf) == 0) {
                #超时则退出,并标记
                elapsed = ustime()-start;
                if (elapsed > timelimit) {
                    timelimit_exit = 1;
                    server.stat_expired_time_cap_reached_count++;
                    break;
                }
            }
        } while (sampled == 0 ||
                 (expired*100/sampled) > config_cycle_acceptable_stale);
    }

    elapsed = ustime()-start;
    server.stat_expire_cycle_time_used += elapsed;
    latencyAddSampleIfNeeded("expire-cycle",elapsed/1000);

    double current_perc;
    if (total_sampled) {
        current_perc = (double)total_expired/total_sampled;
    } else
        current_perc = 0;
    server.stat_expired_stale_perc = (current_perc*0.05)+(server.stat_expired_stale_perc*0.95);
}



关于resize和rehash

  • 初始hashtable长度为4

  • 扩容
    负载因子为1,且没有BGSAVE或BGREWRITEAOF
    或者key数量是数组长度的5倍时,强制扩容
    扩容后size为大于以前size的第一个2次方幂

  • 缩容
    used < size x 0.1,且没有BGSAVE或BGREWRITEAOF
    used数量作为缩容结果

  • 渐进式rehash
    初始化第二个dictht哈希表,并修改rehash标志位为0
    在增删查改、以及空闲时的后台线程进行rehash
    完成后修改标志位为-1


resize

server.c/tryResizeHashTables

void tryResizeHashTables(int dbid) {
    if (htNeedsResize(server.db[dbid].dict))
        dictResize(server.db[dbid].dict);
    if (htNeedsResize(server.db[dbid].expires))
        dictResize(server.db[dbid].expires);
}


server.c/htNeedsResize()

int htNeedsResize(dict *dict) {
    long long size, used;
    size = dictSlots(dict);
    used = dictSize(dict);
    # resize条件
    return (size > DICT_HT_INITIAL_SIZE &&
            (used*100/size < HASHTABLE_MIN_FILL));
}


dict.c/_dictNextPower()

static unsigned long _dictNextPower(unsigned long size)
{
    unsigned long i = DICT_HT_INITIAL_SIZE;
    if (size >= LONG_MAX) return LONG_MAX + 1LU;
    # resize方式
    while(1) {
        if (i >= size)
            return i;
        i *= 2;
    }
}


dict.c/dicExpand()

int dictExpand(dict *d, unsigned long size)
{
    if (dictIsRehashing(d) || d->ht[0].used > size)
        return DICT_ERR;
    # 新hash表
    dictht n;
    unsigned long realsize = _dictNextPower(size);

    if (realsize == d->ht[0].size) return DICT_ERR;

    n.size = realsize;
    n.sizemask = realsize-1;
    n.table = zcalloc(realsize*sizeof(dictEntry*));
    n.used = 0;

    if (d->ht[0].table == NULL) {
        d->ht[0] = n;
        return DICT_OK;
    }

    #准备渐进式rehash
    d->ht[1] = n;
    d->rehashidx = 0;
    return DICT_OK;
}





rehash


添加时dict.c/dictAddRaw()

dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
    long index;
    dictEntry *entry;
    dictht *ht;

    if (dictIsRehashing(d)) _dictRehashStep(d);

    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
        return NULL;

    # rehash过程中,新元素放到新数组
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    entry = zmalloc(sizeof(*entry));
    entry->next = ht->table[index];
    ht->table[index] = entry;
    ht->used++;

    dictSetKey(d, entry, key);
    return entry;
}


databasesCron里的server.c/incrementallyRehash()

dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
int incrementallyRehash(int dbid) {
    # rehash 数据字典
    if (dictIsRehashing(server.db[dbid].dict)) {
        dictRehashMilliseconds(server.db[dbid].dict,1);
        return 1;
    }
    
    # rehash 过期字典
    if (dictIsRehashing(server.db[dbid].expires)) {
        dictRehashMilliseconds(server.db[dbid].expires,1);
        return 1;
    }
    return 0;
}


dict.c/dictRehashMilliseconds()

int dictRehashMilliseconds(dict *d, int ms) {
    long long start = timeInMilliseconds();
    int rehashes = 0;

    #执行 1ms 的rehash操作
    while(dictRehash(d,100)) {
        rehashes += 100;
        if (timeInMilliseconds()-start > ms) break;
    }
    return rehashes;
}


4. 内存淘汰机制

内存不够,redis主动清理。

策略名称 解释
allkeys-lru 所有键 + 最近最少使用
allkeys-lfu 所有键 + 访问频率最小
allkeys-random 所有键 + 随机
volatile-lru 过期键 + 最近最少使用
volatile-lfu 过期键 + 访问频率最小
volatile-random 过期键 + 随机
volatile-ttl 过期键 + 过期时间最小
noeviction 报错

lfu:redis-4.0+

配置 redis.conf
maxmemory-policy (策略)

(Chapter Over)

你可能感兴趣的:(redis Key过期及删除 LazyFree 渐进式Rehash 内存淘汰)