1. key操作
keys *
keys n*r
keys nave?
scan 0
scan 0 match n*r count 5
del key1 key2
unlink key1 key2
exists key1
rename a b
expire a 10
ttl a
type a
dbsize
randomkey
debug object key1
flushdb async
flushall async
2. 过期
2.1 使用
过期后根据过期删除策略进行删除。
# 通用
expire key time_in_second
pexpire key time_in_millisecond
pexpireat key timestamp_in_millisecond
expireat key timestamp_in_second
# String 且 nx(if not exits)
set key value ex 5 nx
# String 且 xx
set key value px 5000 xx
setex key 10 value
# 查看
ttl key
pttl key
2.2 过期字典(expires)
数据结构
struct redisServer {
redisDb *db;
int dbnum;
...
}
typedef struct redisDb {
dict *dict; // 保存着当前数据库中所有键值对
dict *expires // 过期字典,保存着键的过期时间
...
} redisDb;
字典
typedef struct dict {
//类型
dictType *type;
void *privdata;
//两个项数组
dictht ht[2];
//rehash进度,没有进行rehash,为-1
long rehashidx;
unsigned long iterators;
} dict;
hashtable
typedef struct dictht {
dictEntry **table; #hash表数组
unsigned long size; #哈希表大小
unsigned long sizemask; #哈希表大小掩码,=size-1
unsigned long used; #已有节点数
} dictht;
hash表节点
typedef struct dictEntry {
//键
void *key;
//值
union {
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;
struct dictEntry *next; //指向下一个节点,形成链表
} dictEntry;
浏览源码
expire.c/expireGenericCommand(xxx)
void expireGenericCommand(client *c, long long basetime, int unit) {
robj *key = c->argv[1], *param = c->argv[2];
long long when; /* unix time in milliseconds when the key will expire. */
if (getLongLongFromObjectOrReply(c, param, &when, NULL) != C_OK)
return;
if (unit == UNIT_SECONDS) when *= 1000;
when += basetime;
//没有key,返回
if (lookupKeyWrite(c->db,key) == NULL) {
addReply(c,shared.czero);
return;
}
//检查是否已经过期
if (checkAlreadyExpired(when)) {
robj *aux;
int deleted = server.lazyfree_lazy_expire ? dbAsyncDelete(c->db,key) :
dbSyncDelete(c->db,key);
serverAssertWithInfo(c,key,deleted);
server.dirty++;
/* Replicate/AOF this as an explicit DEL or UNLINK. */
aux = server.lazyfree_lazy_expire ? shared.unlink : shared.del;
rewriteClientCommandVector(c,2,aux,key);
signalModifiedKey(c,c->db,key);
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
addReply(c, shared.cone);
return;
} else {
//设置过期
setExpire(c,c->db,key,when);
addReply(c,shared.cone);
signalModifiedKey(c,c->db,key);
notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",key,c->db->id);
server.dirty++;
return;
}
}
db.c/setExpire(xxx)
void setExpire(client *c, redisDb *db, robj *key, long long when) {
dictEntry *kde, *de;
# dictEntry
kde = dictFind(db->dict,key->ptr);
serverAssertWithInfo(NULL,key,kde != NULL);
# dictEntry
de = dictAddOrFind(db->expires,dictGetKey(kde));
# 给节点设值
dictSetSignedIntegerVal(de,when);
int writable_slave = server.masterhost && server.repl_slave_ro == 0;
if (c && writable_slave && !(c->flags & CLIENT_MASTER))
rememberSlaveKeyWithExpire(db,key);
}
dict.h/setExpire(xxx)
\#define dictSetSignedIntegerVal(entry, _val_) \
do { (entry)->v.s64 = _val_; } while(0)
3. 过期后的删除策略
默认同时具有惰性删除
和定期删除
两种机制。
3.1 惰性删除
操作某key时,检查过期。
源码路径
server.c/redisCommandTable[]
struct redisCommand redisCommandTable[] = {
{"module",moduleCommand,-2,
"admin no-script",
0,NULL,0,0,0,0,0,0},
{"get",getCommand,2,
"read-only fast @string",
0,NULL,1,1,1,0,0,0},
...
t_string.c/getCommand()
void getCommand(client *c) {
getGenericCommand(c);
}
t_string.c/getGenericCommand()
int getGenericCommand(client *c) {
robj *o;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp])) == NULL)
return C_OK;
if (o->type != OBJ_STRING) {
addReply(c,shared.wrongtypeerr);
return C_ERR;
} else {
addReplyBulk(c,o);
return C_OK;
}
}
db.c/lookupKeyReadOrReply()
robj *lookupKeyReadOrReply(client *c, robj *key, robj *reply) {
robj *o = lookupKeyRead(c->db, key);
if (!o) addReply(c,reply);
return o;
}
db.c/lookupKeyRead()
robj *lookupKeyRead(redisDb *db, robj *key) {
return lookupKeyReadWithFlags(db,key,LOOKUP_NONE);
}
db.c/lookupKeyReadWithFlags()
robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) {
robj *val;
if (expireIfNeeded(db,key) == 1) {
if (server.masterhost == NULL) {
server.stat_keyspace_misses++;
notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id);
return NULL;
}
if (server.current_client &&
server.current_client != server.master &&
server.current_client->cmd &&
server.current_client->cmd->flags & CMD_READONLY)
{
server.stat_keyspace_misses++;
notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id);
return NULL;
}
}
val = lookupKey(db,key,flags);
if (val == NULL) {
server.stat_keyspace_misses++;
notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id);
}
else
server.stat_keyspace_hits++;
return val;
}
db.c/expireIfNeeded()
int expireIfNeeded(redisDb *db, robj *key) {
//没有过期,返回0
if (!keyIsExpired(db,key)) return 0;
// 从节点返回过期,等待主节点通知
if (server.masterhost != NULL) return 1;
// 统计
server.stat_expiredkeys++;
// 内部通知slave和AOF file
propagateExpire(db,key,server.lazyfree_lazy_expire);
// 键空间失效 ->(延迟任务)
notifyKeyspaceEvent(NOTIFY_EXPIRED,
"expired",key,db->id);
// 根据lazyfree_lazy_expire配置判断是否采用后台进程删除(redis-4.0+)
int retval = server.lazyfree_lazy_expire ? dbAsyncDelete(db,key) :
dbSyncDelete(db,key);
if (retval) signalModifiedKey(NULL,db,key);
return retval;
}
lazy free机制
- 描述
采用后台进程释放。
lazyfree.c/dbAsyncDelete()
#define LAZYFREE_THRESHOLD 64
int dbAsyncDelete(redisDb *db, robj *key) {
if (dictSize(db->expires) > 0) dictDelete(db->expires,key->ptr);
dictEntry *de = dictUnlink(db->dict,key->ptr);
if (de) {
robj *val = dictGetVal(de);
size_t free_effort = lazyfreeGetFreeEffort(val);
if (free_effort > LAZYFREE_THRESHOLD && val->refcount == 1) {
atomicIncr(lazyfree_objects,1);
bioCreateBackgroundJob(BIO_LAZY_FREE,val,NULL,NULL);
dictSetVal(db->dict,de,NULL);
}
}
if (de) {
dictFreeUnlinkedEntry(db->dict,de);
if (server.cluster_enabled) slotToKeyDel(key->ptr);
return 1;
} else {
return 0;
}
}
bio.c/bioCreateBackgroundJob()
void bioCreateBackgroundJob(int type, void *arg1, void *arg2, void *arg3) {
struct bio_job *job = zmalloc(sizeof(*job));
job->time = time(NULL);
job->arg1 = arg1;
job->arg2 = arg2;
job->arg3 = arg3;
pthread_mutex_lock(&bio_mutex[type]);
# 将当前job放入链表尾部
listAddNodeTail(bio_jobs[type],job);
bio_pending[type]++;
pthread_cond_signal(&bio_newjob_cond[type]);
pthread_mutex_unlock(&bio_mutex[type]);
}
- 优劣
好处:redis单线程,异步线程防阻塞。
坏处:内存释放不及时。
- 主动删除(2种)
unlink(值超过LAZYFREE_THRESHOLD 64时启用lazy free)
flushdb async 和 flushall async
- 被动删除(4种lazy free)
配置 | 默认值 | 含义 |
---|---|---|
lazyfree-lazy-eviction | no | 淘汰机制生效时,是否lazy free |
lazyfree-lazy-expire | no | TTL过期且命令触发时,是否lazy free |
lazyfree-lazy-server-del(见下图) | no | del hdel rename dest key1 lpop, rpop, lrem, rpoplpush,brpop srem,smove,spop,sdiffstore,sinterstore,sunionstore zrem,zpopmin,zpopmax bitop or dest key1 key2 georadius ... store dest sort key store dest |
replica(slave)-lazy-flush | no | slave接收RDB文件时的flushall,默认no。内存变化大,建议no。内存变化小,建议yes。 |
3.2 定期删除
默认每秒10次(100ms一次)
规定时间内,分多次遍历各个数据库,从expires过期字典随机检查部分键的过期时间。
配置 redis.conf
hz 10
dynamic-hz yes (默认yes)
源码路径
server.c/serverCron()
旧版redis.c/serverCron
新版server.c/serverCron
更新服务器的内存占用、数据库占用情况。- zmalloc_used_memory(), zmalloc_get_allocator_info()
databasesCron()
关闭和清理连接失效的客户端。 - clientsArePaused()
尝试进行 AOF 或 RDB 持久化操作。- rewriteAppendOnlyFileBackground(), rdbSaveBackground()
如果服务器是主节点的话,对附属节点进行定期同步。
如果处于集群模式的话,对集群进行定期同步和连接测试。- clusterCron()
如果处于哨兵模式的话,运行哨兵检查机制。
server.c/databasesCron()
void databasesCron(void) {
if (server.active_expire_enabled) {
if (iAmMaster()) {
activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW);
} else {
expireSlaveKeys();
}
}
activeDefragCycle();
if (!hasActiveChildProcess()) {
static unsigned int resize_db = 0;
static unsigned int rehash_db = 0;
int dbs_per_call = CRON_DBS_PER_CALL;
int j;
if (dbs_per_call > server.dbnum) dbs_per_call = server.dbnum;
# resize
for (j = 0; j < dbs_per_call; j++) {
tryResizeHashTables(resize_db % server.dbnum);
resize_db++;
}
#配置了activerehashing yes,默认yes
if (server.activerehashing) {
for (j = 0; j < dbs_per_call; j++) {
int work_done = incrementallyRehash(rehash_db);
if (work_done) {
break;
} else {
rehash_db++;
rehash_db %= server.dbnum;
}
}
}
}
}
expire.c/activeExpireCycle()
#define ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP 20
#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000
#define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25
#define ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE 10
void activeExpireCycle(int type) {
unsigned long
#改变有效期努力值,配置active_expire_effort是1-10,
effort = server.active_expire_effort-1,
#每次任务开始前抽样大小
config_keys_per_loop = ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP +
ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP/4*effort,
#定时任务间隔时间,微秒
config_cycle_fast_duration = ACTIVE_EXPIRE_CYCLE_FAST_DURATION +
ACTIVE_EXPIRE_CYCLE_FAST_DURATION/4*effort,
#CPU使用率阀值
config_cycle_slow_time_perc = ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC +
2*effort,
config_cycle_acceptable_stale = ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE-
effort;
static long long last_fast_cycle = 0;
int j, iteration = 0;
int dbs_per_call = CRON_DBS_PER_CALL;
long long start = ustime(), timelimit, elapsed;
if (clientsArePaused()) return;
if (type == ACTIVE_EXPIRE_CYCLE_FAST) {
#如果上次不是因为超时而结束,并且当前过期键数量小于可容忍的过期键数量,不处理
if (!timelimit_exit &&
server.stat_expired_stale_perc < config_cycle_acceptable_stale)
return;
#如果上次开始时间+2倍的任务间隔大于当前时间,则当前任务提前结束
if (start < last_fast_cycle + (long long)config_cycle_fast_duration*2)
return;
last_fast_cycle = start;
}
#以服务器数量为准
if (dbs_per_call > server.dbnum || timelimit_exit)
dbs_per_call = server.dbnum;
timelimit = config_cycle_slow_time_perc*1000000/server.hz/100;
timelimit_exit = 0;
if (timelimit <= 0) timelimit = 1;
if (type == ACTIVE_EXPIRE_CYCLE_FAST)
timelimit = config_cycle_fast_duration;
long total_sampled = 0;
long total_expired = 0;
#依次处理dbs_per_call个数据库
for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) {
unsigned long expired, sampled;
#获取当前db,并用current_db记录进度
redisDb *db = server.db+(current_db % server.dbnum);
current_db++;
do {
unsigned long num, slots;
long long now, ttl_sum;
int ttl_samples;
iteration++;
#db中没有过期键,不处理
if ((num = dictSize(db->expires)) == 0) {
db->avg_ttl = 0;
break;
}
slots = dictSlots(db->expires);
now = mstime();
if (num && slots > DICT_HT_INITIAL_SIZE &&
(num*100/slots < 1)) break;
expired = 0;
sampled = 0;
ttl_sum = 0;
ttl_samples = 0;
if (num > config_keys_per_loop)
num = config_keys_per_loop;
#最大的hash桶数量
long max_buckets = num*20;
#当前检查的hash桶
long checked_buckets = 0;
#采样数量小于每个db的默认检查键数量,当前检查的桶小于最大桶
while (sampled < num && checked_buckets < max_buckets) {
for (int table = 0; table < 2; table++) {
if (table == 1 && !dictIsRehashing(db->expires)) break;
#从当前db的过期游标开始检查过期键
unsigned long idx = db->expires_cursor;
idx &= db->expires->ht[table].sizemask;
dictEntry *de = db->expires->ht[table].table[idx];
long long ttl;
checked_buckets++;
while(de) {
dictEntry *e = de;
de = de->next;
ttl = dictGetSignedIntegerVal(e)-now;
#如果key过期,删除并计数+1
if (activeExpireCycleTryExpire(db,e,now)) expired++;
if (ttl > 0) {
#为了统计平均ttl
ttl_sum += ttl;
ttl_samples++;
}
sampled++;
}
}
db->expires_cursor++;
}
total_expired += expired;
total_sampled += sampled;
if (ttl_samples) {
long long avg_ttl = ttl_sum/ttl_samples;
if (db->avg_ttl == 0) db->avg_ttl = avg_ttl;
db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50);
}
if ((iteration & 0xf) == 0) {
#超时则退出,并标记
elapsed = ustime()-start;
if (elapsed > timelimit) {
timelimit_exit = 1;
server.stat_expired_time_cap_reached_count++;
break;
}
}
} while (sampled == 0 ||
(expired*100/sampled) > config_cycle_acceptable_stale);
}
elapsed = ustime()-start;
server.stat_expire_cycle_time_used += elapsed;
latencyAddSampleIfNeeded("expire-cycle",elapsed/1000);
double current_perc;
if (total_sampled) {
current_perc = (double)total_expired/total_sampled;
} else
current_perc = 0;
server.stat_expired_stale_perc = (current_perc*0.05)+(server.stat_expired_stale_perc*0.95);
}
关于resize和rehash
初始hashtable长度为4
扩容
负载因子为1,且没有BGSAVE或BGREWRITEAOF
或者key数量是数组长度的5倍时,强制扩容
扩容后size为大于以前size的第一个2次方幂缩容
used < size x 0.1,且没有BGSAVE或BGREWRITEAOF
used数量作为缩容结果渐进式rehash
初始化第二个dictht哈希表,并修改rehash标志位为0
在增删查改、以及空闲时的后台线程进行rehash
完成后修改标志位为-1
resize
server.c/tryResizeHashTables
void tryResizeHashTables(int dbid) {
if (htNeedsResize(server.db[dbid].dict))
dictResize(server.db[dbid].dict);
if (htNeedsResize(server.db[dbid].expires))
dictResize(server.db[dbid].expires);
}
server.c/htNeedsResize()
int htNeedsResize(dict *dict) {
long long size, used;
size = dictSlots(dict);
used = dictSize(dict);
# resize条件
return (size > DICT_HT_INITIAL_SIZE &&
(used*100/size < HASHTABLE_MIN_FILL));
}
dict.c/_dictNextPower()
static unsigned long _dictNextPower(unsigned long size)
{
unsigned long i = DICT_HT_INITIAL_SIZE;
if (size >= LONG_MAX) return LONG_MAX + 1LU;
# resize方式
while(1) {
if (i >= size)
return i;
i *= 2;
}
}
dict.c/dicExpand()
int dictExpand(dict *d, unsigned long size)
{
if (dictIsRehashing(d) || d->ht[0].used > size)
return DICT_ERR;
# 新hash表
dictht n;
unsigned long realsize = _dictNextPower(size);
if (realsize == d->ht[0].size) return DICT_ERR;
n.size = realsize;
n.sizemask = realsize-1;
n.table = zcalloc(realsize*sizeof(dictEntry*));
n.used = 0;
if (d->ht[0].table == NULL) {
d->ht[0] = n;
return DICT_OK;
}
#准备渐进式rehash
d->ht[1] = n;
d->rehashidx = 0;
return DICT_OK;
}
rehash
添加时dict.c/dictAddRaw()
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
long index;
dictEntry *entry;
dictht *ht;
if (dictIsRehashing(d)) _dictRehashStep(d);
if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
return NULL;
# rehash过程中,新元素放到新数组
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index];
ht->table[index] = entry;
ht->used++;
dictSetKey(d, entry, key);
return entry;
}
databasesCron里的server.c/incrementallyRehash()
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
int incrementallyRehash(int dbid) {
# rehash 数据字典
if (dictIsRehashing(server.db[dbid].dict)) {
dictRehashMilliseconds(server.db[dbid].dict,1);
return 1;
}
# rehash 过期字典
if (dictIsRehashing(server.db[dbid].expires)) {
dictRehashMilliseconds(server.db[dbid].expires,1);
return 1;
}
return 0;
}
dict.c/dictRehashMilliseconds()
int dictRehashMilliseconds(dict *d, int ms) {
long long start = timeInMilliseconds();
int rehashes = 0;
#执行 1ms 的rehash操作
while(dictRehash(d,100)) {
rehashes += 100;
if (timeInMilliseconds()-start > ms) break;
}
return rehashes;
}
4. 内存淘汰机制
内存不够,redis主动清理。
策略名称 | 解释 |
---|---|
allkeys-lru | 所有键 + 最近最少使用 |
allkeys-lfu | 所有键 + 访问频率最小 |
allkeys-random | 所有键 + 随机 |
volatile-lru | 过期键 + 最近最少使用 |
volatile-lfu | 过期键 + 访问频率最小 |
volatile-random | 过期键 + 随机 |
volatile-ttl | 过期键 + 过期时间最小 |
noeviction | 报错 |
lfu:redis-4.0+
配置 redis.conf
maxmemory-policy (策略)
(Chapter Over)